020a83e4d3
evolution-data-server. Bump PORTREVISION. patch 4.1.25.1 Application, using Berkeley DB's Concurrent Data Store product with the DB_CDB_ALLDB flag set, that open databases while also holding open cursors could hang. patch 4.1.25.2 fix transaction abort hang. Obtained from: patches from sleepycat, Patch changelog from debian db 4.1 package changelog.
480 lines
14 KiB
Text
480 lines
14 KiB
Text
--- libdb/fileops/fop_util.c.orig Thu Nov 20 23:13:30 2003
|
|
+++ libdb/fileops/fop_util.c Fri Mar 18 20:31:10 2005
|
|
@@ -40,7 +40,7 @@
|
|
u_int32_t __lockval; \
|
|
\
|
|
if (LOCKING_ON((ENV))) { \
|
|
- __lockval = 0; \
|
|
+ __lockval = 1; \
|
|
__dbt.data = &__lockval; \
|
|
__dbt.size = sizeof(__lockval); \
|
|
if ((ret = (ENV)->lock_get((ENV), (ID), \
|
|
--- libdb/dbinc/mp.h.orig Thu Nov 20 23:13:17 2003
|
|
+++ libdb/dbinc/mp.h Fri Mar 18 20:31:14 2005
|
|
@@ -149,6 +149,13 @@
|
|
* region lock).
|
|
*/
|
|
DB_MPOOL_STAT stat; /* Per-cache mpool statistics. */
|
|
+
|
|
+ /*
|
|
+ * We track page puts so that we can decide when allocation is never
|
|
+ * going to succeed. We don't lock the field, all we care about is
|
|
+ * if it changes.
|
|
+ */
|
|
+ u_int32_t put_counter; /* Count of page put calls. */
|
|
};
|
|
|
|
struct __db_mpool_hash {
|
|
--- libdb/mp/mp_fput.c.orig Thu Nov 20 23:13:36 2003
|
|
+++ libdb/mp/mp_fput.c Fri Mar 18 20:31:14 2005
|
|
@@ -19,6 +19,8 @@
|
|
#include "dbinc/db_shash.h"
|
|
#include "dbinc/mp.h"
|
|
|
|
+static void __memp_reset_lru __P((DB_ENV *, REGINFO *));
|
|
+
|
|
/*
|
|
* __memp_fput --
|
|
* Mpool file put function.
|
|
@@ -198,5 +200,56 @@
|
|
|
|
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
|
|
|
+ /*
|
|
+ * On every buffer put we update the buffer generation number and check
|
|
+ * for wraparound.
|
|
+ */
|
|
+ if (++c_mp->lru_count == UINT32_T_MAX)
|
|
+ __memp_reset_lru(dbenv, dbmp->reginfo);
|
|
+
|
|
return (0);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * __memp_reset_lru --
|
|
+ * Reset the cache LRU counter.
|
|
+ */
|
|
+static void
|
|
+__memp_reset_lru(dbenv, memreg)
|
|
+ DB_ENV *dbenv;
|
|
+ REGINFO *memreg;
|
|
+{
|
|
+ BH *bhp;
|
|
+ DB_MPOOL_HASH *hp;
|
|
+ MPOOL *c_mp;
|
|
+ int bucket;
|
|
+
|
|
+ c_mp = memreg->primary;
|
|
+
|
|
+ /*
|
|
+ * Update the counter so all future allocations will start at the
|
|
+ * bottom.
|
|
+ */
|
|
+ c_mp->lru_count -= MPOOL_BASE_DECREMENT;
|
|
+
|
|
+ /* Adjust the priority of every buffer in the system. */
|
|
+ for (hp = R_ADDR(memreg, c_mp->htab),
|
|
+ bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
|
|
+ /*
|
|
+ * Skip empty buckets.
|
|
+ *
|
|
+ * We can check for empty buckets before locking as we
|
|
+ * only care if the pointer is zero or non-zero.
|
|
+ */
|
|
+ if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
|
|
+ continue;
|
|
+
|
|
+ MUTEX_LOCK(dbenv, &hp->hash_mutex);
|
|
+ for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
|
|
+ bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
|
|
+ if (bhp->priority != UINT32_T_MAX &&
|
|
+ bhp->priority > MPOOL_BASE_DECREMENT)
|
|
+ bhp->priority -= MPOOL_BASE_DECREMENT;
|
|
+ MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
|
+ }
|
|
}
|
|
--- libdb/mp/mp_alloc.c.orig Thu Nov 20 23:13:36 2003
|
|
+++ libdb/mp/mp_alloc.c Fri Mar 18 20:31:14 2005
|
|
@@ -25,7 +25,6 @@
|
|
} HS;
|
|
|
|
static void __memp_bad_buffer __P((DB_MPOOL_HASH *));
|
|
-static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *));
|
|
|
|
/*
|
|
* __memp_alloc --
|
|
@@ -50,8 +49,9 @@
|
|
MPOOL *c_mp;
|
|
MPOOLFILE *bh_mfp;
|
|
size_t freed_space;
|
|
- u_int32_t buckets, buffers, high_priority, max_na, priority;
|
|
- int aggressive, ret;
|
|
+ u_int32_t buckets, buffers, high_priority, priority, put_counter;
|
|
+ u_int32_t total_buckets;
|
|
+ int aggressive, giveup, ret;
|
|
void *p;
|
|
|
|
dbenv = dbmp->dbenv;
|
|
@@ -59,18 +59,13 @@
|
|
dbht = R_ADDR(memreg, c_mp->htab);
|
|
hp_end = &dbht[c_mp->htab_buckets];
|
|
|
|
- buckets = buffers = 0;
|
|
- aggressive = 0;
|
|
+ buckets = buffers = put_counter = total_buckets = 0;
|
|
+ aggressive = giveup = 0;
|
|
+ hp_tmp = NULL;
|
|
|
|
c_mp->stat.st_alloc++;
|
|
|
|
/*
|
|
- * Get aggressive if we've tried to flush the number of pages as are
|
|
- * in the system without finding space.
|
|
- */
|
|
- max_na = 5 * c_mp->htab_buckets;
|
|
-
|
|
- /*
|
|
* If we're allocating a buffer, and the one we're discarding is the
|
|
* same size, we don't want to waste the time to re-integrate it into
|
|
* the shared memory free list. If the DB_MPOOLFILE argument isn't
|
|
@@ -81,19 +76,10 @@
|
|
len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize;
|
|
|
|
R_LOCK(dbenv, memreg);
|
|
-
|
|
- /*
|
|
- * On every buffer allocation we update the buffer generation number
|
|
- * and check for wraparound.
|
|
- */
|
|
- if (++c_mp->lru_count == UINT32_T_MAX)
|
|
- __memp_reset_lru(dbenv, memreg, c_mp);
|
|
-
|
|
/*
|
|
* Anything newer than 1/10th of the buffer pool is ignored during
|
|
* allocation (unless allocation starts failing).
|
|
*/
|
|
- DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10);
|
|
high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
|
|
|
|
/*
|
|
@@ -120,10 +106,11 @@
|
|
* We're not holding the region locked here, these statistics
|
|
* can't be trusted.
|
|
*/
|
|
- if (buckets != 0) {
|
|
- if (buckets > c_mp->stat.st_alloc_max_buckets)
|
|
- c_mp->stat.st_alloc_max_buckets = buckets;
|
|
- c_mp->stat.st_alloc_buckets += buckets;
|
|
+ total_buckets += buckets;
|
|
+ if (total_buckets != 0) {
|
|
+ if (total_buckets > c_mp->stat.st_alloc_max_buckets)
|
|
+ c_mp->stat.st_alloc_max_buckets = total_buckets;
|
|
+ c_mp->stat.st_alloc_buckets += total_buckets;
|
|
}
|
|
if (buffers != 0) {
|
|
if (buffers > c_mp->stat.st_alloc_max_pages)
|
|
@@ -131,6 +118,12 @@
|
|
c_mp->stat.st_alloc_pages += buffers;
|
|
}
|
|
return (0);
|
|
+ } else if (giveup || c_mp->stat.st_pages == 0) {
|
|
+ R_UNLOCK(dbenv, memreg);
|
|
+
|
|
+ __db_err(dbenv,
|
|
+ "unable to allocate space from the buffer cache");
|
|
+ return (ret);
|
|
}
|
|
|
|
/*
|
|
@@ -138,26 +131,24 @@
|
|
* we need. Reset our free-space counter.
|
|
*/
|
|
freed_space = 0;
|
|
+ total_buckets += buckets;
|
|
+ buckets = 0;
|
|
|
|
/*
|
|
* Walk the hash buckets and find the next two with potentially useful
|
|
* buffers. Free the buffer with the lowest priority from the buckets'
|
|
* chains.
|
|
*/
|
|
- for (hp_tmp = NULL;;) {
|
|
+ for (;;) {
|
|
+ /* All pages have been freed, make one last try */
|
|
+ if (c_mp->stat.st_pages == 0)
|
|
+ goto alloc;
|
|
+
|
|
/* Check for wrap around. */
|
|
hp = &dbht[c_mp->last_checked++];
|
|
if (hp >= hp_end) {
|
|
c_mp->last_checked = 0;
|
|
-
|
|
- /*
|
|
- * If we've gone through all of the hash buckets, try
|
|
- * an allocation. If the cache is small, the old page
|
|
- * size is small, and the new page size is large, we
|
|
- * might have freed enough memory (but not 3 times the
|
|
- * memory).
|
|
- */
|
|
- goto alloc;
|
|
+ hp = &dbht[c_mp->last_checked++];
|
|
}
|
|
|
|
/*
|
|
@@ -172,39 +163,59 @@
|
|
/*
|
|
* The failure mode is when there are too many buffers we can't
|
|
* write or there's not enough memory in the system. We don't
|
|
- * have a metric for deciding if allocation has no possible way
|
|
- * to succeed, so we don't ever fail, we assume memory will be
|
|
- * available if we wait long enough.
|
|
+ * have a way to know that allocation has no way to succeed.
|
|
+ * We fail if there were no pages returned to the cache after
|
|
+ * we've been trying for a relatively long time.
|
|
*
|
|
- * Get aggressive if we've tried to flush 5 times the number of
|
|
- * hash buckets as are in the system -- it's possible we have
|
|
- * been repeatedly trying to flush the same buffers, although
|
|
- * it's unlikely. Aggressive means:
|
|
+ * Get aggressive if we've tried to flush the number of hash
|
|
+ * buckets as are in the system and have not found any more
|
|
+ * space. Aggressive means:
|
|
*
|
|
* a: set a flag to attempt to flush high priority buffers as
|
|
* well as other buffers.
|
|
* b: sync the mpool to force out queue extent pages. While we
|
|
* might not have enough space for what we want and flushing
|
|
* is expensive, why not?
|
|
- * c: sleep for a second -- hopefully someone else will run and
|
|
- * free up some memory. Try to allocate memory too, in case
|
|
- * the other thread returns its memory to the region.
|
|
- * d: look at a buffer in every hash bucket rather than choose
|
|
+ * c: look at a buffer in every hash bucket rather than choose
|
|
* the more preferable of two.
|
|
+ * d: start to think about giving up.
|
|
+ *
|
|
+ * If we get here twice, sleep for a second, hopefully someone
|
|
+ * else will run and free up some memory.
|
|
+ *
|
|
+ * Always try to allocate memory too, in case some other thread
|
|
+ * returns its memory to the region.
|
|
*
|
|
* !!!
|
|
* This test ignores pathological cases like no buffers in the
|
|
* system -- that shouldn't be possible.
|
|
*/
|
|
- if ((++buckets % max_na) == 0) {
|
|
- aggressive = 1;
|
|
-
|
|
+ if ((++buckets % c_mp->htab_buckets) == 0) {
|
|
+ if (freed_space > 0)
|
|
+ goto alloc;
|
|
R_UNLOCK(dbenv, memreg);
|
|
|
|
- (void)__memp_sync_int(
|
|
- dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
|
|
-
|
|
- (void)__os_sleep(dbenv, 1, 0);
|
|
+ switch (++aggressive) {
|
|
+ case 1:
|
|
+ break;
|
|
+ case 2:
|
|
+ put_counter = c_mp->put_counter;
|
|
+ /* FALLTHROUGH */
|
|
+ case 3:
|
|
+ case 4:
|
|
+ case 5:
|
|
+ case 6:
|
|
+ (void)__memp_sync_int(
|
|
+ dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
|
|
+
|
|
+ (void)__os_sleep(dbenv, 1, 0);
|
|
+ break;
|
|
+ default:
|
|
+ aggressive = 1;
|
|
+ if (put_counter == c_mp->put_counter)
|
|
+ giveup = 1;
|
|
+ break;
|
|
+ }
|
|
|
|
R_LOCK(dbenv, memreg);
|
|
goto alloc;
|
|
@@ -277,7 +288,8 @@
|
|
* thread may have acquired this buffer and incremented the ref
|
|
* count after we wrote it, in which case we can't have it.
|
|
*
|
|
- * If there's a write error, avoid selecting this buffer again
|
|
+ * If there's a write error and we're having problems finding
|
|
+ * something to allocate, avoid selecting this buffer again
|
|
* by making it the bucket's least-desirable buffer.
|
|
*/
|
|
if (ret != 0 || bhp->ref != 0) {
|
|
@@ -301,6 +313,8 @@
|
|
|
|
freed_space += __db_shsizeof(bhp);
|
|
__memp_bhfree(dbmp, hp, bhp, 1);
|
|
+ if (aggressive > 1)
|
|
+ aggressive = 1;
|
|
|
|
/*
|
|
* Unlock this hash bucket and re-acquire the region lock. If
|
|
@@ -360,54 +374,6 @@
|
|
|
|
/* Reset the hash bucket's priority. */
|
|
hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
|
|
-}
|
|
-
|
|
-/*
|
|
- * __memp_reset_lru --
|
|
- * Reset the cache LRU counter.
|
|
- */
|
|
-static void
|
|
-__memp_reset_lru(dbenv, memreg, c_mp)
|
|
- DB_ENV *dbenv;
|
|
- REGINFO *memreg;
|
|
- MPOOL *c_mp;
|
|
-{
|
|
- BH *bhp;
|
|
- DB_MPOOL_HASH *hp;
|
|
- int bucket;
|
|
-
|
|
- /*
|
|
- * Update the counter so all future allocations will start at the
|
|
- * bottom.
|
|
- */
|
|
- c_mp->lru_count -= MPOOL_BASE_DECREMENT;
|
|
-
|
|
- /* Release the region lock. */
|
|
- R_UNLOCK(dbenv, memreg);
|
|
-
|
|
- /* Adjust the priority of every buffer in the system. */
|
|
- for (hp = R_ADDR(memreg, c_mp->htab),
|
|
- bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
|
|
- /*
|
|
- * Skip empty buckets.
|
|
- *
|
|
- * We can check for empty buckets before locking as we
|
|
- * only care if the pointer is zero or non-zero.
|
|
- */
|
|
- if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
|
|
- continue;
|
|
-
|
|
- MUTEX_LOCK(dbenv, &hp->hash_mutex);
|
|
- for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
|
|
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
|
|
- if (bhp->priority != UINT32_T_MAX &&
|
|
- bhp->priority > MPOOL_BASE_DECREMENT)
|
|
- bhp->priority -= MPOOL_BASE_DECREMENT;
|
|
- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
|
- }
|
|
-
|
|
- /* Reacquire the region lock. */
|
|
- R_LOCK(dbenv, memreg);
|
|
}
|
|
|
|
#ifdef DIAGNOSTIC
|
|
--- libdb/dbreg/dbreg_rec.c.orig Thu Nov 20 23:13:19 2003
|
|
+++ libdb/dbreg/dbreg_rec.c Fri Mar 18 20:31:14 2005
|
|
@@ -174,19 +174,20 @@
|
|
* Typically, closes should match an open which means
|
|
* that if this is a close, there should be a valid
|
|
* entry in the dbentry table when we get here,
|
|
- * however there is an exception. If this is an
|
|
+ * however there are exceptions. 1. If this is an
|
|
* OPENFILES pass, then we may have started from
|
|
* a log file other than the first, and the
|
|
* corresponding open appears in an earlier file.
|
|
- * We can ignore that case, but all others are errors.
|
|
+ * 2. If we are undoing an open on an abort or
|
|
+ * recovery, it's possible that we failed after
|
|
+ * the log record, but before we actually entered
|
|
+ * a handle here.
|
|
*/
|
|
dbe = &dblp->dbentry[argp->fileid];
|
|
if (dbe->dbp == NULL && !dbe->deleted) {
|
|
/* No valid entry here. */
|
|
- if ((argp->opcode != LOG_CLOSE &&
|
|
- argp->opcode != LOG_RCLOSE) ||
|
|
- (op != DB_TXN_OPENFILES &&
|
|
- op !=DB_TXN_POPENFILES)) {
|
|
+ if (DB_REDO(op) ||
|
|
+ argp->opcode == LOG_CHECKPOINT) {
|
|
__db_err(dbenv,
|
|
"Improper file close at %lu/%lu",
|
|
(u_long)lsnp->file,
|
|
--- libdb/env/env_recover.c.orig Thu Nov 20 23:13:20 2003
|
|
+++ libdb/env/env_recover.c Fri Mar 18 20:31:14 2005
|
|
@@ -232,12 +232,9 @@
|
|
* we'll still need to do a vtruncate based on information we haven't
|
|
* yet collected.
|
|
*/
|
|
- if (ret == DB_NOTFOUND) {
|
|
+ if (ret == DB_NOTFOUND)
|
|
ret = 0;
|
|
- if (max_lsn == NULL)
|
|
- goto done;
|
|
- }
|
|
- if (ret != 0)
|
|
+ else if (ret != 0)
|
|
goto err;
|
|
|
|
hi_txn = txnid;
|
|
@@ -331,7 +328,7 @@
|
|
|
|
/* Find a low txnid. */
|
|
ret = 0;
|
|
- do {
|
|
+ if (hi_txn != 0) do {
|
|
/* txnid is after rectype, which is a u_int32. */
|
|
memcpy(&txnid,
|
|
(u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
|
|
@@ -344,11 +341,8 @@
|
|
* There are no transactions and we're not recovering to an LSN (see
|
|
* above), so there is nothing to do.
|
|
*/
|
|
- if (ret == DB_NOTFOUND) {
|
|
+ if (ret == DB_NOTFOUND)
|
|
ret = 0;
|
|
- if (max_lsn == NULL)
|
|
- goto done;
|
|
- }
|
|
|
|
/* Reset to the first lsn. */
|
|
if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
|
|
@@ -367,6 +361,10 @@
|
|
txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
|
|
goto err;
|
|
|
|
+ /* If there were no transactions, then we can bail out early. */
|
|
+ if (hi_txn == 0 && max_lsn == NULL)
|
|
+ goto done;
|
|
+
|
|
/*
|
|
* Pass #2.
|
|
*
|
|
@@ -483,6 +481,7 @@
|
|
if ((ret = __dbreg_close_files(dbenv)) != 0)
|
|
goto err;
|
|
|
|
+done:
|
|
if (max_lsn != NULL) {
|
|
region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
|
|
|
|
@@ -538,7 +537,8 @@
|
|
__db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
|
|
__db_err(dbenv, "%s %lx %s [%lu][%lu]",
|
|
"Maximum transaction ID",
|
|
- ((DB_TXNHEAD *)txninfo)->maxid,
|
|
+ txninfo == NULL ? TXN_MINIMUM :
|
|
+ ((DB_TXNHEAD *)txninfo)->maxid,
|
|
"Recovery checkpoint",
|
|
(u_long)region->last_ckp.file,
|
|
(u_long)region->last_ckp.offset);
|
|
@@ -550,7 +550,6 @@
|
|
(u_long)lsn.file, (u_long)lsn.offset, pass);
|
|
}
|
|
|
|
-done:
|
|
err: if (lockid != DB_LOCK_INVALIDID) {
|
|
if ((t_ret = __rep_unlockpages(dbenv, lockid)) != 0 && ret == 0)
|
|
ret = t_ret;
|