summaryrefslogtreecommitdiff
path: root/db-4.8.30/dbinc
diff options
context:
space:
mode:
authorJesse Morgan <jesse@jesterpm.net>2016-12-17 21:28:53 -0800
committerJesse Morgan <jesse@jesterpm.net>2016-12-17 21:28:53 -0800
commit54df2afaa61c6a03cbb4a33c9b90fa572b6d07b8 (patch)
tree18147b92b969d25ffbe61935fb63035cac820dd0 /db-4.8.30/dbinc
Berkeley DB 4.8 with rust build script for linux.
Diffstat (limited to 'db-4.8.30/dbinc')
-rw-r--r--db-4.8.30/dbinc/atomic.h220
-rw-r--r--db-4.8.30/dbinc/btree.h480
-rw-r--r--db-4.8.30/dbinc/clock.h127
-rw-r--r--db-4.8.30/dbinc/crypto.h85
-rw-r--r--db-4.8.30/dbinc/cxx_int.h75
-rw-r--r--db-4.8.30/dbinc/db.in2441
-rw-r--r--db-4.8.30/dbinc/db_185.in176
-rw-r--r--db-4.8.30/dbinc/db_am.h311
-rw-r--r--db-4.8.30/dbinc/db_cxx.in1365
-rw-r--r--db-4.8.30/dbinc/db_dispatch.h97
-rw-r--r--db-4.8.30/dbinc/db_int.in933
-rw-r--r--db-4.8.30/dbinc/db_join.h37
-rw-r--r--db-4.8.30/dbinc/db_page.h672
-rw-r--r--db-4.8.30/dbinc/db_swap.h262
-rw-r--r--db-4.8.30/dbinc/db_upgrade.h248
-rw-r--r--db-4.8.30/dbinc/db_verify.h204
-rw-r--r--db-4.8.30/dbinc/debug.h277
-rw-r--r--db-4.8.30/dbinc/fop.h32
-rw-r--r--db-4.8.30/dbinc/globals.h123
-rw-r--r--db-4.8.30/dbinc/hash.h169
-rw-r--r--db-4.8.30/dbinc/hmac.h39
-rw-r--r--db-4.8.30/dbinc/lock.h310
-rw-r--r--db-4.8.30/dbinc/log.h448
-rw-r--r--db-4.8.30/dbinc/mp.h647
-rw-r--r--db-4.8.30/dbinc/mutex.h277
-rw-r--r--db-4.8.30/dbinc/mutex_int.h1073
-rw-r--r--db-4.8.30/dbinc/os.h176
-rw-r--r--db-4.8.30/dbinc/partition.h54
-rw-r--r--db-4.8.30/dbinc/qam.h180
-rw-r--r--db-4.8.30/dbinc/queue.h563
-rw-r--r--db-4.8.30/dbinc/region.h285
-rw-r--r--db-4.8.30/dbinc/rep.h831
-rw-r--r--db-4.8.30/dbinc/repmgr.h548
-rw-r--r--db-4.8.30/dbinc/shqueue.h406
-rw-r--r--db-4.8.30/dbinc/tcl_db.h278
-rw-r--r--db-4.8.30/dbinc/txn.h227
-rw-r--r--db-4.8.30/dbinc/win_db.h144
37 files changed, 14820 insertions, 0 deletions
diff --git a/db-4.8.30/dbinc/atomic.h b/db-4.8.30/dbinc/atomic.h
new file mode 100644
index 0000000..0034dcc
--- /dev/null
+++ b/db-4.8.30/dbinc/atomic.h
@@ -0,0 +1,220 @@
+/*
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_ATOMIC_H_
+#define _DB_ATOMIC_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Atomic operation support for Oracle Berkeley DB
+ *
+ * HAVE_ATOMIC_SUPPORT configures whether to use the assembly language
+ * or system calls to perform:
+ *
+ * atomic_inc(env, valueptr)
+ * Adds 1 to the db_atomic_t value, returning the new value.
+ *
+ * atomic_dec(env, valueptr)
+ * Subtracts 1 from the db_atomic_t value, returning the new value.
+ *
+ * atomic_compare_exchange(env, valueptr, oldval, newval)
+ * If the db_atomic_t's value is still oldval, set it to newval.
+ * It returns 1 for success or 0 for failure.
+ *
+ * The ENV * paramter is used only when HAVE_ATOMIC_SUPPORT is undefined.
+ *
+ * If the platform does not natively support any one of these operations,
+ * then atomic operations will be emulated with this sequence:
+ * MUTEX_LOCK()
+ * <op>
+ * MUTEX_UNLOCK();
+ * Uses where mutexes are not available (e.g. the environment has not yet
+ * attached to the mutex region) must be avoided.
+ */
+#if defined(DB_WIN32)
+typedef DWORD atomic_value_t;
+#else
+typedef int32_t atomic_value_t;
+#endif
+
+/*
+ * Windows CE has strange issues using the Interlocked APIs with variables
+ * stored in shared memory. It seems like the page needs to have been written
+ * prior to the API working as expected. Work around this by allocating an
+ * additional 32-bit value that can be harmlessly written for each value
+ * used in Interlocked instructions.
+ */
+#if defined(DB_WINCE)
+typedef struct {
+ volatile atomic_value_t value;
+ volatile atomic_value_t dummy;
+} db_atomic_t;
+#else
+typedef struct {
+ volatile atomic_value_t value;
+} db_atomic_t;
+#endif
+
+/*
+ * These macro hide the db_atomic_t structure layout and help detect
+ * non-atomic_t actual argument to the atomic_xxx() calls. DB requires
+ * aligned 32-bit reads to be atomic even outside of explicit 'atomic' calls.
+ * These have no memory barriers; the caller must include them when necessary.
+ */
+#define atomic_read(p) ((p)->value)
+#define atomic_init(p, val) ((p)->value = (val))
+
+#ifdef HAVE_ATOMIC_SUPPORT
+
+#if defined(DB_WIN32)
+#if defined(DB_WINCE)
+#define WINCE_ATOMIC_MAGIC(p) \
+ /* \
+ * Memory mapped regions on Windows CE cause problems with \
+ * InterlockedXXX calls. Each page in a mapped region needs to \
+ * have been written to prior to an InterlockedXXX call, or the \
+ * InterlockedXXX call hangs. This does not seem to be \
+ * documented anywhere. For now, read/write a non-critical \
+ * piece of memory from the shared region prior to attempting \
+ * shared region prior to attempting an InterlockedExchange \
+ * InterlockedXXX operation. \
+ */ \
+ (p)->dummy = 0
+#else
+#define WINCE_ATOMIC_MAGIC(p) 0
+#endif
+
+#if defined(DB_WINCE) || (defined(_MSC_VER) && _MSC_VER < 1300)
+/*
+ * The Interlocked instructions on Windows CE have different parameter
+ * definitions. The parameters lost their 'volatile' qualifier,
+ * cast it away, to avoid compiler warnings.
+ * These definitions should match those in dbinc/mutex_int.h for tsl_t, except
+ * that the WINCE version drops the volatile qualifier.
+ */
+typedef PLONG interlocked_val;
+#define atomic_inc(env, p) \
+ (WINCE_ATOMIC_MAGIC(p), \
+ InterlockedIncrement((interlocked_val)(&(p)->value)))
+
+#else
+typedef LONG volatile *interlocked_val;
+#define atomic_inc(env, p) \
+ InterlockedIncrement((interlocked_val)(&(p)->value))
+#endif
+
+#define atomic_dec(env, p) \
+ (WINCE_ATOMIC_MAGIC(p), \
+ InterlockedDecrement((interlocked_val)(&(p)->value)))
+#if defined(_MSC_VER) && _MSC_VER < 1300
+#define atomic_compare_exchange(env, p, oldval, newval) \
+ (WINCE_ATOMIC_MAGIC(p), \
+ (InterlockedCompareExchange((PVOID *)(&(p)->value), \
+ (PVOID)(newval), (PVOID)(oldval)) == (PVOID)(oldval)))
+#else
+#define atomic_compare_exchange(env, p, oldval, newval) \
+ (WINCE_ATOMIC_MAGIC(p), \
+ (InterlockedCompareExchange((interlocked_val)(&(p)->value), \
+ (newval), (oldval)) == (oldval)))
+#endif
+#endif
+
+#if defined(HAVE_ATOMIC_SOLARIS)
+/* Solaris sparc & x86/64 */
+#include <atomic.h>
+#define atomic_inc(env, p) \
+ atomic_inc_uint_nv((volatile unsigned int *) &(p)->value)
+#define atomic_dec(env, p) \
+ atomic_dec_uint_nv((volatile unsigned int *) &(p)->value)
+#define atomic_compare_exchange(env, p, oval, nval) \
+ (atomic_cas_32((volatile unsigned int *) &(p)->value, \
+ (oval), (nval)) == (oval))
+#endif
+
+#if defined(HAVE_ATOMIC_X86_GCC_ASSEMBLY)
+/* x86/x86_64 gcc */
+#define atomic_inc(env, p) __atomic_inc(p)
+#define atomic_dec(env, p) __atomic_dec(p)
+#define atomic_compare_exchange(env, p, o, n) \
+ __atomic_compare_exchange((p), (o), (n))
+static inline int __atomic_inc(db_atomic_t *p)
+{
+ int temp;
+
+ temp = 1;
+ __asm__ __volatile__("lock; xadd %0, (%1)"
+ : "+r"(temp)
+ : "r"(p));
+ return (temp + 1);
+}
+
+static inline int __atomic_dec(db_atomic_t *p)
+{
+ int temp;
+
+ temp = -1;
+ __asm__ __volatile__("lock; xadd %0, (%1)"
+ : "+r"(temp)
+ : "r"(p));
+ return (temp - 1);
+}
+
+/*
+ * x86/gcc Compare exchange for shared latches. i486+
+ * Returns 1 for success, 0 for failure
+ *
+ * GCC 4.1+ has an equivalent __sync_bool_compare_and_swap() as well as
+ * __sync_val_compare_and_swap() which returns the value read from *dest
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ * which configure could be changed to use.
+ */
+static inline int __atomic_compare_exchange(
+ db_atomic_t *p, atomic_value_t oldval, atomic_value_t newval)
+{
+ atomic_value_t was;
+
+ if (p->value != oldval) /* check without expensive cache line locking */
+ return 0;
+ __asm__ __volatile__("lock; cmpxchgl %1, (%2);"
+ :"=a"(was)
+ :"r"(newval), "r"(p), "a"(oldval)
+ :"memory", "cc");
+ return (was == oldval);
+}
+#endif
+
+#else
+/*
+ * No native hardware support for atomic increment, decrement, and
+ * compare-exchange. Emulate them when mutexes are supported;
+ * do them without concern for atomicity when no mutexes.
+ */
+#ifndef HAVE_MUTEX_SUPPORT
+/*
+ * These minimal versions are correct to use only for single-threaded,
+ * single-process environments.
+ */
+#define atomic_inc(env, p) (++(p)->value)
+#define atomic_dec(env, p) (--(p)->value)
+#define atomic_compare_exchange(env, p, oldval, newval) \
+ (DB_ASSERT(env, atomic_read(p) == (oldval)), \
+ atomic_init(p, (newval)), 1)
+#else
+#define atomic_inc(env, p) __atomic_inc(env, p)
+#define atomic_dec(env, p) __atomic_dec(env, p)
+#endif
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* !_DB_ATOMIC_H_ */
diff --git a/db-4.8.30/dbinc/btree.h b/db-4.8.30/dbinc/btree.h
new file mode 100644
index 0000000..afb81b3
--- /dev/null
+++ b/db-4.8.30/dbinc/btree.h
@@ -0,0 +1,480 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ * Keith Bostic. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+#ifndef _DB_BTREE_H_
+#define _DB_BTREE_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Forward structure declarations. */
+struct __btree; typedef struct __btree BTREE;
+struct __cursor; typedef struct __cursor BTREE_CURSOR;
+struct __epg; typedef struct __epg EPG;
+
+#define DEFMINKEYPAGE (2)
+
+/*
+ * A recno order of 0 indicates that we don't have an order, not that we've
+ * an order less than 1.
+ */
+#define INVALID_ORDER 0
+
+#define ISINTERNAL(p) (TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO)
+#define ISLEAF(p) (TYPE(p) == P_LBTREE || \
+ TYPE(p) == P_LRECNO || TYPE(p) == P_LDUP)
+
+/* Flags for __bam_cadjust_log(). */
+#define CAD_UPDATEROOT 0x01 /* Root page count was updated. */
+
+/* Flags for __bam_split_log(). */
+#define SPL_NRECS 0x01 /* Split tree has record count. */
+#define SPL_RECNO 0x02 /* This is a Recno cursor. */
+
+/* Flags for __bam_iitem(). */
+#define BI_DELETED 0x01 /* Key/data pair only placeholder. */
+
+/* Flags for __bam_stkrel(). */
+#define STK_CLRDBC 0x01 /* Clear dbc->page reference. */
+#define STK_NOLOCK 0x02 /* Don't retain locks. */
+#define STK_PGONLY 0x04
+
+/* Flags for __ram_ca(). These get logged, so make the values explicit. */
+typedef enum {
+ CA_DELETE = 0, /* Delete the current record. */
+ CA_IAFTER = 1, /* Insert before the current record. */
+ CA_IBEFORE = 2, /* Insert after the current record. */
+ CA_ICURRENT = 3 /* Overwrite the current record. */
+} ca_recno_arg;
+
+/*
+ * Flags for __bam_search() and __bam_rsearch().
+ *
+ * Note, internal page searches must find the largest record less than key in
+ * the tree so that descents work. Leaf page searches must find the smallest
+ * record greater than key so that the returned index is the record's correct
+ * position for insertion.
+ *
+ * The flags parameter to the search routines describes three aspects of the
+ * search: the type of locking required (including if we're locking a pair of
+ * pages), the item to return in the presence of duplicates and whether or not
+ * to return deleted entries. To simplify both the mnemonic representation
+ * and the code that checks for various cases, we construct a set of bitmasks.
+ */
+#define SR_READ 0x00001 /* Read locks. */
+#define SR_WRITE 0x00002 /* Write locks. */
+
+#define SR_APPEND 0x00040 /* Append to the tree. */
+#define SR_DELNO 0x00080 /* Don't return deleted items. */
+#define SR_DUPFIRST 0x00100 /* Return first duplicate. */
+#define SR_DUPLAST 0x00200 /* Return last duplicate. */
+#define SR_EXACT 0x00400 /* Exact items only. */
+#define SR_PARENT 0x00800 /* Lock page pair. */
+#define SR_STACK 0x01000 /* Need a complete stack. */
+#define SR_PAST_EOF 0x02000 /* If doing insert search (or keyfirst
+ * or keylast operations), or a split
+ * on behalf of an insert, it's okay to
+ * return an entry one past end-of-page.
+ */
+#define SR_STK_ONLY 0x04000 /* Just return info in the stack */
+#define SR_MAX 0x08000 /* Get the right most key */
+#define SR_MIN 0x10000 /* Get the left most key */
+#define SR_NEXT 0x20000 /* Get the page after this key */
+#define SR_DEL 0x40000 /* Get the tree to delete this key. */
+#define SR_START 0x80000 /* Level to start stack. */
+#define SR_BOTH 0x100000 /* Get this and the NEXT page */
+
+#define SR_DELETE \
+ (SR_WRITE | SR_DUPFIRST | SR_DELNO | SR_EXACT | SR_STACK)
+#define SR_FIND (SR_READ | SR_DUPFIRST | SR_DELNO)
+#define SR_FIND_WR (SR_WRITE | SR_DUPFIRST | SR_DELNO)
+#define SR_INSERT (SR_WRITE | SR_DUPLAST | SR_PAST_EOF | SR_STACK)
+#define SR_KEYFIRST (SR_WRITE | SR_DUPFIRST | SR_PAST_EOF | SR_STACK)
+#define SR_KEYLAST (SR_WRITE | SR_DUPLAST | SR_PAST_EOF | SR_STACK)
+#define SR_WRPAIR (SR_WRITE | SR_DUPLAST | SR_PAST_EOF | SR_PARENT)
+
+/*
+ * Various routines pass around page references. A page reference is
+ * a pointer to the page, and the indx indicates an item on the page.
+ * Each page reference may include a lock.
+ */
+struct __epg {
+ PAGE *page; /* The page. */
+ db_indx_t indx; /* The index on the page. */
+ db_indx_t entries; /* The number of entries on page */
+ DB_LOCK lock; /* The page's lock. */
+ db_lockmode_t lock_mode; /* The lock mode. */
+};
+
+/*
+ * We maintain a stack of the pages that we're locking in the tree. Grow
+ * the stack as necessary.
+ *
+ * XXX
+ * Temporary fix for #3243 -- clear the page and lock from the stack entry.
+ * The correct fix is to never release a stack that doesn't hold items.
+ */
+#define BT_STK_CLR(c) do { \
+ (c)->csp = (c)->sp; \
+ (c)->csp->page = NULL; \
+ LOCK_INIT((c)->csp->lock); \
+} while (0)
+
+#define BT_STK_ENTER(env, c, pagep, page_indx, l, mode, ret) do { \
+ if ((ret = ((c)->csp == (c)->esp ? \
+ __bam_stkgrow(env, c) : 0)) == 0) { \
+ (c)->csp->page = pagep; \
+ (c)->csp->indx = (page_indx); \
+ (c)->csp->entries = NUM_ENT(pagep); \
+ (c)->csp->lock = l; \
+ (c)->csp->lock_mode = mode; \
+ } \
+} while (0)
+
+#define BT_STK_PUSH(env, c, pagep, page_indx, lock, mode, ret) do { \
+ BT_STK_ENTER(env, c, pagep, page_indx, lock, mode, ret); \
+ ++(c)->csp; \
+} while (0)
+
+#define BT_STK_NUM(env, c, pagep, page_indx, ret) do { \
+ if ((ret = ((c)->csp == \
+ (c)->esp ? __bam_stkgrow(env, c) : 0)) == 0) { \
+ (c)->csp->page = NULL; \
+ (c)->csp->indx = (page_indx); \
+ (c)->csp->entries = NUM_ENT(pagep); \
+ LOCK_INIT((c)->csp->lock); \
+ (c)->csp->lock_mode = DB_LOCK_NG; \
+ } \
+} while (0)
+
+#define BT_STK_NUMPUSH(env, c, pagep, page_indx, ret) do { \
+ BT_STK_NUM(env, cp, pagep, page_indx, ret); \
+ ++(c)->csp; \
+} while (0)
+
+#define BT_STK_POP(c) \
+ ((c)->csp == (c)->sp ? NULL : --(c)->csp)
+
+/*
+ * Flags for __bam_dpages.
+ */
+#define BTD_UPDATE 0x0001 /* Update parents. */
+#define BTD_RELINK 0x0002 /* Relink leaf pages. */
+
+/*
+ * TRY_LOCK
+ * When holding a stack we have pages latched but not locked so
+ * we must avoid an undetectable deadlock by not then blocking on a
+ * lock.
+ */
+#define TRY_LOCK(dbc, pgno, saved_pgno, saved_lock, lock_mode, label) \
+ TRY_LOCK2(dbc, NULL, pgno, saved_pgno, saved_lock, lock_mode, label)
+/*
+ * TRY_LOCK2
+ * This is a special call for __bam_compact_int which uses 2
+ * overlapping stacks.
+ */
+
+#ifdef BTREE_DEBUG
+#define TRY_LOCK2(dbc, ndbc, pgno, \
+ saved_pgno, saved_lock, lock_mode, label) do { \
+ static int BTcount = 0; \
+ if ((pgno) != (saved_pgno) && \
+ ((BTcount++ % 5) == 0 || \
+ (ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno, \
+ lock_mode, DB_LOCK_NOWAIT, &(saved_lock))) != 0)) { \
+ if (ret != 0 && ret != DB_LOCK_NOTGRANTED && \
+ ret != DB_LOCK_DEADLOCK) \
+ break; \
+ if ((ndbc) != NULL) { \
+ BTREE_CURSOR *__cp; \
+ __cp = (BTREE_CURSOR *) (dbc)->internal; \
+ __cp->sp->page = NULL; \
+ LOCK_INIT(__cp->sp->lock); \
+ if ((ret = __bam_stkrel(ndbc, 0)) != 0) \
+ break; \
+ } \
+ if ((ret = __bam_stkrel(dbc, 0)) != 0) \
+ break; \
+ if ((ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno, \
+ lock_mode, 0, &(saved_lock))) != 0) \
+ break; \
+ saved_pgno = pgno; \
+ goto label; \
+ } \
+ saved_pgno = pgno; \
+} while (0)
+#else
+#define TRY_LOCK2(dbc, ndbc, pgno, \
+ saved_pgno, saved_lock, lock_mode, label) do { \
+ if ((pgno) != (saved_pgno) && \
+ (ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno, \
+ lock_mode, DB_LOCK_NOWAIT, &(saved_lock))) != 0) { \
+ if (ret != DB_LOCK_NOTGRANTED && \
+ ret != DB_LOCK_DEADLOCK) \
+ break; \
+ if ((ndbc) != NULL) { \
+ BTREE_CURSOR *__cp; \
+ __cp = (BTREE_CURSOR *) (dbc)->internal; \
+ __cp->sp->page = NULL; \
+ LOCK_INIT(__cp->sp->lock); \
+ if ((ret = __bam_stkrel(ndbc, 0)) != 0) \
+ break; \
+ } \
+ if ((ret = __bam_stkrel(dbc, 0)) != 0) \
+ break; \
+ if ((ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno, \
+ lock_mode, 0, &(saved_lock))) != 0) \
+ break; \
+ saved_pgno = pgno; \
+ goto label; \
+ } \
+ saved_pgno = pgno; \
+} while (0)
+#endif
+
+/* Btree/Recno cursor. */
+struct __cursor {
+ /* struct __dbc_internal */
+ __DBC_INTERNAL
+
+ /* btree private part */
+ EPG *sp; /* Stack pointer. */
+ EPG *csp; /* Current stack entry. */
+ EPG *esp; /* End stack pointer. */
+ EPG stack[5];
+
+ db_indx_t ovflsize; /* Maximum key/data on-page size. */
+
+ db_recno_t recno; /* Current record number. */
+ u_int32_t order; /* Relative order among deleted curs. */
+
+#ifdef HAVE_COMPRESSION
+ /*
+ * Compression:
+ *
+ * We need to hold the current compressed chunk, as well as the previous
+ * key/data, in order to decompress the next key/data. We do that by
+ * swapping whether prevKey/Data and currentKey/Data point to
+ * key1/data1, or key2/data2.
+ *
+ * We store prevcursor in order to be able to perform one level of
+ * DB_PREV by returning prevKey/prevData. We need prev2cursor to more
+ * efficiently do a subsequent DB_PREV with a linear search from the
+ * begining of the compressed chunk.
+ *
+ * When we delete entries, we set the cursor to point to the next entry
+ * after the last deleted key, and set C_COMPRESS_DELETED. The del_key
+ * DBT holds the key of the deleted entry supposedly pointed to by a
+ * compressed cursor, and is used to implement DB_PREV_DUP,
+ * DB_PREV_NODUP, DB_NEXT_DUP, and DB_NEXT_NODUP on a deleted entry.
+ */
+ DBT compressed; /* Current compressed chunk */
+ DBT key1; /* Holds prevKey or currentKey */
+ DBT key2; /* Holds prevKey or currentKey */
+ DBT data1; /* Holds prevData or currentData */
+ DBT data2; /* Holds prevData or currentData */
+ DBT del_key; /* Holds key from the deleted entry */
+ DBT del_data; /* Holds data from the deleted entry */
+ DBT *prevKey; /* Previous key decompressed */
+ DBT *prevData; /* Previous data decompressed */
+ DBT *currentKey; /* Current key decompressed */
+ DBT *currentData; /* Current data decompressed */
+ u_int8_t *compcursor; /* Current position in compressed */
+ u_int8_t *compend; /* End of compressed */
+ u_int8_t *prevcursor; /* Previous current position */
+ u_int8_t *prev2cursor; /* Previous previous current position */
+#endif
+
+ /*
+ * Btree:
+ * We set a flag in the cursor structure if the underlying object has
+ * been deleted. It's not strictly necessary, we could get the same
+ * information by looking at the page itself, but this method doesn't
+ * require us to retrieve the page on cursor delete.
+ *
+ * Recno:
+ * When renumbering recno databases during deletes, cursors referencing
+ * "deleted" records end up positioned between two records, and so must
+ * be specially adjusted on the next operation.
+ */
+#define C_DELETED 0x0001 /* Record was deleted. */
+ /*
+ * There are three tree types that require maintaining record numbers.
+ * Recno AM trees, Btree AM trees for which the DB_RECNUM flag was set,
+ * and Btree off-page duplicate trees.
+ */
+#define C_RECNUM 0x0002 /* Tree requires record counts. */
+ /*
+ * Recno trees have immutable record numbers by default, but optionally
+ * support mutable record numbers. Off-page duplicate Recno trees have
+ * mutable record numbers. All Btrees with record numbers (including
+ * off-page duplicate trees) are mutable by design, no flag is needed.
+ */
+#define C_RENUMBER 0x0004 /* Tree records are mutable. */
+ /*
+ * The current compressed key/data could be deleted, as well as the
+ * key/data that the underlying BTree cursor points to.
+ */
+#define C_COMPRESS_DELETED 0x0008 /* Compressed record was deleted. */
+ /*
+ * The current compressed chunk has been modified by another DBC. A
+ * compressed cursor will have to seek it's position again if necessary
+ * when it is next accessed.
+ */
+#define C_COMPRESS_MODIFIED 0x0010 /* Compressed record was modified. */
+ u_int32_t flags;
+};
+
+/*
+ * Threshhold value, as a function of bt_minkey, of the number of
+ * bytes a key/data pair can use before being placed on an overflow
+ * page. Assume every item requires the maximum alignment for
+ * padding, out of sheer paranoia.
+ */
+#define B_MINKEY_TO_OVFLSIZE(dbp, minkey, pgsize) \
+ ((u_int16_t)(((pgsize) - P_OVERHEAD(dbp)) / ((minkey) * P_INDX) -\
+ (BKEYDATA_PSIZE(0) + DB_ALIGN(1, sizeof(int32_t)))))
+
+/*
+ * The maximum space that a single item can ever take up on one page.
+ * Used by __bam_split to determine whether a split is still necessary.
+ */
+#define B_MAX(a,b) (((a) > (b)) ? (a) : (b))
+#define B_MAXSIZEONPAGE(ovflsize) \
+ (B_MAX(BOVERFLOW_PSIZE, BKEYDATA_PSIZE(ovflsize)))
+
+/*
+ * The in-memory, per-tree btree/recno data structure.
+ */
+struct __btree { /* Btree access method. */
+ /*
+ * !!!
+ * These fields are write-once (when the structure is created) and
+ * so are ignored as far as multi-threading is concerned.
+ */
+ db_pgno_t bt_meta; /* Database meta-data page. */
+ db_pgno_t bt_root; /* Database root page. */
+
+ u_int32_t bt_minkey; /* Minimum keys per page. */
+
+ /* Btree comparison function. */
+ int (*bt_compare) __P((DB *, const DBT *, const DBT *));
+ /* Btree prefix function. */
+ size_t (*bt_prefix) __P((DB *, const DBT *, const DBT *));
+ /* Btree compress function. */
+#ifdef HAVE_COMPRESSION
+ int (*bt_compress) __P((DB *, const DBT *, const DBT *, const DBT *,
+ const DBT *, DBT *));
+ /* Btree decompress function. */
+ int (*bt_decompress) __P((DB *, const DBT *, const DBT *, DBT *, DBT *,
+ DBT *));
+ /* dup_compare for compression */
+ int (*compress_dup_compare) __P((DB *, const DBT *, const DBT *));
+#endif
+
+ /* Recno access method. */
+ int re_pad; /* Fixed-length padding byte. */
+ int re_delim; /* Variable-length delimiting byte. */
+ u_int32_t re_len; /* Length for fixed-length records. */
+ char *re_source; /* Source file name. */
+
+ /*
+ * !!!
+ * The bt_lpgno field is NOT protected by any mutex, and for this
+ * reason must be advisory only, so, while it is read/written by
+ * multiple threads, DB is completely indifferent to the quality
+ * of its information.
+ */
+ db_pgno_t bt_lpgno; /* Last insert location. */
+ DB_LSN bt_llsn; /* Last insert LSN. */
+
+ /*
+ * !!!
+ * The re_modified field is NOT protected by any mutex, and for this
+ * reason cannot be anything more complicated than a zero/non-zero
+ * value. The actual writing of the backing source file cannot be
+ * threaded, so clearing the flag isn't a problem.
+ */
+ int re_modified; /* If the tree was modified. */
+
+ /*
+ * !!!
+ * These fields are ignored as far as multi-threading is concerned.
+ * There are no transaction semantics associated with backing files,
+ * nor is there any thread protection.
+ */
+ FILE *re_fp; /* Source file handle. */
+ int re_eof; /* Backing source file EOF reached. */
+ db_recno_t re_last; /* Last record number read. */
+
+};
+
+/*
+ * Modes for the __bam_curadj recovery records (btree_curadj).
+ * These appear in log records, so we wire the values and
+ * do not leave it up to the compiler.
+ */
+typedef enum {
+ DB_CA_DI = 1,
+ DB_CA_DUP = 2,
+ DB_CA_RSPLIT = 3,
+ DB_CA_SPLIT = 4
+} db_ca_mode;
+
+/*
+ * Flags for __bam_pinsert.
+ */
+#define BPI_SPACEONLY 0x01 /* Only check for space to update. */
+#define BPI_NORECNUM 0x02 /* Not update the recnum on the left. */
+#define BPI_NOLOGGING 0x04 /* Don't log the update. */
+#define BPI_REPLACE 0x08 /* Repleace the record. */
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/btree_auto.h"
+#include "dbinc_auto/btree_ext.h"
+#include "dbinc/db_am.h"
+#endif /* !_DB_BTREE_H_ */
diff --git a/db-4.8.30/dbinc/clock.h b/db-4.8.30/dbinc/clock.h
new file mode 100644
index 0000000..0ed4350
--- /dev/null
+++ b/db-4.8.30/dbinc/clock.h
@@ -0,0 +1,127 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2005-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)time.h 8.5 (Berkeley) 5/4/95
+ * FreeBSD: src/sys/sys/time.h,v 1.65 2004/04/07 04:19:49 imp Exp
+ */
+
+#ifndef _DB_CLOCK_H_
+#define _DB_CLOCK_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * This declaration is POSIX-compatible. Because there are lots of different
+ * time.h include file patterns out there, it's easier to declare our own name
+ * in all cases than to try and discover if a system has a struct timespec.
+ * For the same reason, and because we'd have to #include <sys/time.h> in db.h,
+ * we don't export any timespec structures in the DB API, even in places where
+ * it would make sense, like the replication statistics information.
+ */
+typedef struct {
+ time_t tv_sec; /* seconds */
+ long tv_nsec; /* nanoseconds */
+} db_timespec;
+
+/* Operations on timespecs */
+#undef timespecclear
+#define timespecclear(tvp) ((tvp)->tv_sec = (tvp)->tv_nsec = 0)
+#undef timespecisset
+#define timespecisset(tvp) ((tvp)->tv_sec || (tvp)->tv_nsec)
+#undef timespeccmp
+#define timespeccmp(tvp, uvp, cmp) \
+ (((tvp)->tv_sec == (uvp)->tv_sec) ? \
+ ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \
+ ((tvp)->tv_sec cmp (uvp)->tv_sec))
+#undef timespecadd
+/*
+ * Note that using timespecadd to add to yourself (i.e. doubling)
+ * must be supported.
+ */
+#define timespecadd(vvp, uvp) \
+ do { \
+ (vvp)->tv_sec += (uvp)->tv_sec; \
+ (vvp)->tv_nsec += (uvp)->tv_nsec; \
+ if ((vvp)->tv_nsec >= 1000000000) { \
+ (vvp)->tv_sec++; \
+ (vvp)->tv_nsec -= 1000000000; \
+ } \
+ } while (0)
+#undef timespecsub
+#define timespecsub(vvp, uvp) \
+ do { \
+ (vvp)->tv_sec -= (uvp)->tv_sec; \
+ (vvp)->tv_nsec -= (uvp)->tv_nsec; \
+ if ((vvp)->tv_nsec < 0) { \
+ (vvp)->tv_sec--; \
+ (vvp)->tv_nsec += 1000000000; \
+ } \
+ } while (0)
+
+#undef timespecset
+#define timespecset(vvp, sec, nsec) \
+ do { \
+ (vvp)->tv_sec = (time_t)(sec); \
+ (vvp)->tv_nsec = (long)(nsec); \
+ } while (0)
+
+#define DB_TIMEOUT_TO_TIMESPEC(t, vvp) \
+ do { \
+ (vvp)->tv_sec = (time_t)((t) / 1000000); \
+ (vvp)->tv_nsec = (long)(((t) % 1000000) * 1000); \
+ } while (0)
+
+#define DB_TIMESPEC_TO_TIMEOUT(t, vvp, prec) \
+ do { \
+ t = (u_long)((vvp)->tv_sec * 1000000); \
+ t += (u_long)((vvp)->tv_nsec / 1000); \
+ /* Add in 1 usec for lost nsec precision if wanted. */ \
+ if (prec) \
+ t++; \
+ } while (0)
+
+#define TIMESPEC_ADD_DB_TIMEOUT(vvp, t) \
+ do { \
+ db_timespec __tmp; \
+ DB_TIMEOUT_TO_TIMESPEC(t, &__tmp); \
+ timespecadd((vvp), &__tmp); \
+ } while (0)
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_CLOCK_H_ */
diff --git a/db-4.8.30/dbinc/crypto.h b/db-4.8.30/dbinc/crypto.h
new file mode 100644
index 0000000..1e60f72
--- /dev/null
+++ b/db-4.8.30/dbinc/crypto.h
@@ -0,0 +1,85 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_CRYPTO_H_
+#define _DB_CRYPTO_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * !!!
+ * These are the internal representations of the algorithm flags.
+ * They are used in both the DB_CIPHER structure and the CIPHER
+ * structure so we can tell if users specified both passwd and alg
+ * correctly.
+ *
+ * CIPHER_ANY is used when an app joins an existing env but doesn't
+ * know the algorithm originally used. This is only valid in the
+ * DB_CIPHER structure until we open and can set the alg.
+ */
+/*
+ * We store the algorithm in an 8-bit field on the meta-page. So we
+ * use a numeric value, not bit fields.
+ * now we are limited to 8 algorithms before we cannot use bits and
+ * need numeric values. That should be plenty. It is okay for the
+ * CIPHER_ANY flag to go beyond that since that is never stored on disk.
+ */
+
+/*
+ * This structure is per-process, not in shared memory.
+ */
+struct __db_cipher {
+ u_int (*adj_size) __P((size_t));
+ int (*close) __P((ENV *, void *));
+ int (*decrypt) __P((ENV *, void *, void *, u_int8_t *, size_t));
+ int (*encrypt) __P((ENV *, void *, void *, u_int8_t *, size_t));
+ int (*init) __P((ENV *, DB_CIPHER *));
+
+ u_int8_t mac_key[DB_MAC_KEY]; /* MAC key. */
+ void *data; /* Algorithm-specific information */
+
+#define CIPHER_AES 1 /* AES algorithm */
+ u_int8_t alg; /* Algorithm used - See above */
+ u_int8_t spare[3]; /* Spares */
+
+#define CIPHER_ANY 0x00000001 /* Only for DB_CIPHER */
+ u_int32_t flags; /* Other flags */
+};
+
+#ifdef HAVE_CRYPTO
+
+#include "crypto/rijndael/rijndael-api-fst.h"
+
+/*
+ * Shared ciphering structure
+ * No mutex needed because all information is read-only after creation.
+ */
+typedef struct __cipher {
+ roff_t passwd; /* Offset to shared passwd */
+ size_t passwd_len; /* Length of passwd */
+ u_int32_t flags; /* Algorithm used - see above */
+} CIPHER;
+
+#define DB_AES_KEYLEN 128 /* AES key length */
+#define DB_AES_CHUNK 16 /* AES byte unit size */
+
+typedef struct __aes_cipher {
+ keyInstance decrypt_ki; /* Decryption key instance */
+ keyInstance encrypt_ki; /* Encryption key instance */
+ u_int32_t flags; /* AES-specific flags */
+} AES_CIPHER;
+
+#include "dbinc_auto/crypto_ext.h"
+#endif /* HAVE_CRYPTO */
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_CRYPTO_H_ */
diff --git a/db-4.8.30/dbinc/cxx_int.h b/db-4.8.30/dbinc/cxx_int.h
new file mode 100644
index 0000000..2e423b4
--- /dev/null
+++ b/db-4.8.30/dbinc/cxx_int.h
@@ -0,0 +1,75 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_CXX_INT_H_
+#define _DB_CXX_INT_H_
+
+// private data structures known to the implementation only
+
+//
+// Using FooImp classes will allow the implementation to change in the
+// future without any modification to user code or even to header files
+// that the user includes. FooImp * is just like void * except that it
+// provides a little extra protection, since you cannot randomly assign
+// any old pointer to a FooImp* as you can with void *. Currently, a
+// pointer to such an opaque class is always just a pointer to the
+// appropriate underlying implementation struct. These are converted
+// back and forth using the various overloaded wrap()/unwrap() methods.
+// This is essentially a use of the "Bridge" Design Pattern.
+//
+// WRAPPED_CLASS implements the appropriate wrap() and unwrap() methods
+// for a wrapper class that has an underlying pointer representation.
+//
+#define WRAPPED_CLASS(_WRAPPER_CLASS, _IMP_CLASS, _WRAPPED_TYPE) \
+ class _IMP_CLASS {}; \
+ \
+ inline _WRAPPED_TYPE *unwrap(_WRAPPER_CLASS *val) \
+ { \
+ if (!val) return (0); \
+ return (val->get_##_WRAPPED_TYPE()); \
+ } \
+ \
+ inline const _WRAPPED_TYPE *unwrapConst(const _WRAPPER_CLASS *val) \
+ { \
+ if (!val) return (0); \
+ return (val->get_const_##_WRAPPED_TYPE()); \
+ }
+
+WRAPPED_CLASS(Db, DbImp, DB)
+WRAPPED_CLASS(DbEnv, DbEnvImp, DB_ENV)
+WRAPPED_CLASS(DbMpoolFile, DbMpoolFileImp, DB_MPOOLFILE)
+WRAPPED_CLASS(DbSequence, DbSequenceImp, DB_SEQUENCE)
+WRAPPED_CLASS(DbTxn, DbTxnImp, DB_TXN)
+
+// A tristate integer value used by the DB_ERROR macro below.
+// We chose not to make this an enumerated type so it can
+// be kept private, even though methods that return the
+// tristate int can be declared in db_cxx.h .
+//
+#define ON_ERROR_THROW 1
+#define ON_ERROR_RETURN 0
+#define ON_ERROR_UNKNOWN (-1)
+
+// Macros that handle detected errors, in case we want to
+// change the default behavior. The 'policy' is one of
+// the tristate values given above. If UNKNOWN is specified,
+// the behavior is taken from the last initialized DbEnv.
+//
+#define DB_ERROR(dbenv, caller, ecode, policy) \
+ DbEnv::runtime_error(dbenv, caller, ecode, policy)
+
+#define DB_ERROR_DBT(dbenv, caller, dbt, policy) \
+ DbEnv::runtime_error_dbt(dbenv, caller, dbt, policy)
+
+#define DB_OVERFLOWED_DBT(dbt) \
+ (F_ISSET(dbt, DB_DBT_USERMEM) && dbt->size > dbt->ulen)
+
+/* values for Db::flags_ */
+#define DB_CXX_PRIVATE_ENV 0x00000001
+
+#endif /* !_DB_CXX_INT_H_ */
diff --git a/db-4.8.30/dbinc/db.in b/db-4.8.30/dbinc/db.in
new file mode 100644
index 0000000..9fc6712
--- /dev/null
+++ b/db-4.8.30/dbinc/db.in
@@ -0,0 +1,2441 @@
+/*
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ *
+ * db.h include file layout:
+ * General.
+ * Database Environment.
+ * Locking subsystem.
+ * Logging subsystem.
+ * Shared buffer cache (mpool) subsystem.
+ * Transaction subsystem.
+ * Access methods.
+ * Access method cursors.
+ * Dbm/Ndbm, Hsearch historic interfaces.
+ */
+
+#ifndef _DB_H_
+#define _DB_H_
+
+#ifndef __NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+@inttypes_h_decl@
+@stdint_h_decl@
+@stddef_h_decl@
+#include <stdio.h>
+@unistd_h_decl@
+@thread_h_decl@
+#endif
+
+@platform_header@
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+@DB_CONST@
+@DB_PROTO1@
+@DB_PROTO2@
+
+/*
+ * Berkeley DB version information.
+ */
+#define DB_VERSION_MAJOR @DB_VERSION_MAJOR@
+#define DB_VERSION_MINOR @DB_VERSION_MINOR@
+#define DB_VERSION_PATCH @DB_VERSION_PATCH@
+#define DB_VERSION_STRING @DB_VERSION_STRING@
+
+/*
+ * !!!
+ * Berkeley DB uses specifically sized types. If they're not provided by
+ * the system, typedef them here.
+ *
+ * We protect them against multiple inclusion using __BIT_TYPES_DEFINED__,
+ * as does BIND and Kerberos, since we don't know for sure what #include
+ * files the user is using.
+ *
+ * !!!
+ * We also provide the standard u_int, u_long etc., if they're not provided
+ * by the system.
+ */
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+@u_int8_decl@
+@int16_decl@
+@u_int16_decl@
+@int32_decl@
+@u_int32_decl@
+@int64_decl@
+@u_int64_decl@
+#endif
+
+@u_char_decl@
+@u_int_decl@
+@u_long_decl@
+@u_short_decl@
+
+/*
+ * Missing ANSI types.
+ *
+ * uintmax_t --
+ * Largest unsigned type, used to align structures in memory. We don't store
+ * floating point types in structures, so integral types should be sufficient
+ * (and we don't have to worry about systems that store floats in other than
+ * power-of-2 numbers of bytes). Additionally this fixes compilers that rewrite
+ * structure assignments and ANSI C memcpy calls to be in-line instructions
+ * that happen to require alignment.
+ *
+ * uintptr_t --
+ * Unsigned type that's the same size as a pointer. There are places where
+ * DB modifies pointers by discarding the bottom bits to guarantee alignment.
+ * We can't use uintmax_t, it may be larger than the pointer, and compilers
+ * get upset about that. So far we haven't run on any machine where there's
+ * no unsigned type the same size as a pointer -- here's hoping.
+ */
+@uintmax_t_decl@
+@uintptr_t_decl@
+
+@FILE_t_decl@
+@off_t_decl@
+@pid_t_decl@
+@size_t_decl@
+@ssize_t_decl@
+@time_t_decl@
+
+/*
+ * Sequences are only available on machines with 64-bit integral types.
+ */
+@db_seq_decl@
+
+/* Thread and process identification. */
+@db_threadid_t_decl@
+
+/* Basic types that are exported or quasi-exported. */
+typedef u_int32_t db_pgno_t; /* Page number type. */
+typedef u_int16_t db_indx_t; /* Page offset type. */
+#define DB_MAX_PAGES 0xffffffff /* >= # of pages in a file */
+
+typedef u_int32_t db_recno_t; /* Record number type. */
+#define DB_MAX_RECORDS 0xffffffff /* >= # of records in a tree */
+
+typedef u_int32_t db_timeout_t; /* Type of a timeout. */
+
+/*
+ * Region offsets are the difference between a pointer in a region and the
+ * region's base address. With private environments, both addresses are the
+ * result of calling malloc, and we can't assume anything about what malloc
+ * will return, so region offsets have to be able to hold differences between
+ * arbitrary pointers.
+ */
+typedef uintptr_t roff_t;
+
+/*
+ * Forward structure declarations, so we can declare pointers and
+ * applications can get type checking.
+ */
+struct __db; typedef struct __db DB;
+struct __db_bt_stat; typedef struct __db_bt_stat DB_BTREE_STAT;
+struct __db_cipher; typedef struct __db_cipher DB_CIPHER;
+struct __db_compact; typedef struct __db_compact DB_COMPACT;
+struct __db_dbt; typedef struct __db_dbt DBT;
+struct __db_distab; typedef struct __db_distab DB_DISTAB;
+struct __db_env; typedef struct __db_env DB_ENV;
+struct __db_h_stat; typedef struct __db_h_stat DB_HASH_STAT;
+struct __db_ilock; typedef struct __db_ilock DB_LOCK_ILOCK;
+struct __db_lock_hstat; typedef struct __db_lock_hstat DB_LOCK_HSTAT;
+struct __db_lock_pstat; typedef struct __db_lock_pstat DB_LOCK_PSTAT;
+struct __db_lock_stat; typedef struct __db_lock_stat DB_LOCK_STAT;
+struct __db_lock_u; typedef struct __db_lock_u DB_LOCK;
+struct __db_locker; typedef struct __db_locker DB_LOCKER;
+struct __db_lockreq; typedef struct __db_lockreq DB_LOCKREQ;
+struct __db_locktab; typedef struct __db_locktab DB_LOCKTAB;
+struct __db_log; typedef struct __db_log DB_LOG;
+struct __db_log_cursor; typedef struct __db_log_cursor DB_LOGC;
+struct __db_log_stat; typedef struct __db_log_stat DB_LOG_STAT;
+struct __db_lsn; typedef struct __db_lsn DB_LSN;
+struct __db_mpool; typedef struct __db_mpool DB_MPOOL;
+struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
+struct __db_mpool_stat; typedef struct __db_mpool_stat DB_MPOOL_STAT;
+struct __db_mpoolfile; typedef struct __db_mpoolfile DB_MPOOLFILE;
+struct __db_mutex_stat; typedef struct __db_mutex_stat DB_MUTEX_STAT;
+struct __db_mutex_t; typedef struct __db_mutex_t DB_MUTEX;
+struct __db_mutexmgr; typedef struct __db_mutexmgr DB_MUTEXMGR;
+struct __db_preplist; typedef struct __db_preplist DB_PREPLIST;
+struct __db_qam_stat; typedef struct __db_qam_stat DB_QUEUE_STAT;
+struct __db_rep; typedef struct __db_rep DB_REP;
+struct __db_rep_stat; typedef struct __db_rep_stat DB_REP_STAT;
+struct __db_repmgr_site;typedef struct __db_repmgr_site DB_REPMGR_SITE;
+struct __db_repmgr_stat;typedef struct __db_repmgr_stat DB_REPMGR_STAT;
+struct __db_seq_record; typedef struct __db_seq_record DB_SEQ_RECORD;
+struct __db_seq_stat; typedef struct __db_seq_stat DB_SEQUENCE_STAT;
+struct __db_sequence; typedef struct __db_sequence DB_SEQUENCE;
+struct __db_thread_info;typedef struct __db_thread_info DB_THREAD_INFO;
+struct __db_txn; typedef struct __db_txn DB_TXN;
+struct __db_txn_active; typedef struct __db_txn_active DB_TXN_ACTIVE;
+struct __db_txn_stat; typedef struct __db_txn_stat DB_TXN_STAT;
+struct __db_txnmgr; typedef struct __db_txnmgr DB_TXNMGR;
+struct __dbc; typedef struct __dbc DBC;
+struct __dbc_internal; typedef struct __dbc_internal DBC_INTERNAL;
+struct __env; typedef struct __env ENV;
+struct __fh_t; typedef struct __fh_t DB_FH;
+struct __fname; typedef struct __fname FNAME;
+struct __key_range; typedef struct __key_range DB_KEY_RANGE;
+struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE;
+
+/*
+ * The Berkeley DB API flags are automatically-generated -- the following flag
+ * names are no longer used, but remain for compatibility reasons.
+ */
+#define DB_DEGREE_2 DB_READ_COMMITTED
+#define DB_DIRTY_READ DB_READ_UNCOMMITTED
+#define DB_JOINENV 0x0
+
+/* Key/data structure -- a Data-Base Thang. */
+struct __db_dbt {
+ void *data; /* Key/data */
+ u_int32_t size; /* key/data length */
+
+ u_int32_t ulen; /* RO: length of user buffer. */
+ u_int32_t dlen; /* RO: get/put record length. */
+ u_int32_t doff; /* RO: get/put record offset. */
+
+ void *app_data;
+
+#define DB_DBT_APPMALLOC 0x001 /* Callback allocated memory. */
+#define DB_DBT_BULK 0x002 /* Internal: Insert if duplicate. */
+#define DB_DBT_DUPOK 0x004 /* Internal: Insert if duplicate. */
+#define DB_DBT_ISSET 0x008 /* Lower level calls set value. */
+#define DB_DBT_MALLOC 0x010 /* Return in malloc'd memory. */
+#define DB_DBT_MULTIPLE 0x020 /* References multiple records. */
+#define DB_DBT_PARTIAL 0x040 /* Partial put/get. */
+#define DB_DBT_REALLOC 0x080 /* Return in realloc'd memory. */
+#define DB_DBT_STREAMING 0x100 /* Internal: DBT is being streamed. */
+#define DB_DBT_USERCOPY 0x200 /* Use the user-supplied callback. */
+#define DB_DBT_USERMEM 0x400 /* Return in user's memory. */
+ u_int32_t flags;
+};
+
+/*******************************************************
+ * Mutexes.
+ *******************************************************/
+typedef u_int32_t db_mutex_t;
+
+struct __db_mutex_stat {
+ /* The following fields are maintained in the region's copy. */
+ u_int32_t st_mutex_align; /* Mutex alignment */
+ u_int32_t st_mutex_tas_spins; /* Mutex test-and-set spins */
+ u_int32_t st_mutex_cnt; /* Mutex count */
+ u_int32_t st_mutex_free; /* Available mutexes */
+ u_int32_t st_mutex_inuse; /* Mutexes in use */
+ u_int32_t st_mutex_inuse_max; /* Maximum mutexes ever in use */
+
+ /* The following fields are filled-in from other places. */
+#ifndef __TEST_DB_NO_STATISTICS
+ uintmax_t st_region_wait; /* Region lock granted after wait. */
+ uintmax_t st_region_nowait; /* Region lock granted without wait. */
+ roff_t st_regsize; /* Region size. */
+#endif
+};
+
+/* This is the length of the buffer passed to DB_ENV->thread_id_string() */
+#define DB_THREADID_STRLEN 128
+
+/*******************************************************
+ * Locking.
+ *******************************************************/
+#define DB_LOCKVERSION 1
+
+#define DB_FILE_ID_LEN 20 /* Unique file ID length. */
+
+/*
+ * Deadlock detector modes; used in the DB_ENV structure to configure the
+ * locking subsystem.
+ */
+#define DB_LOCK_NORUN 0
+#define DB_LOCK_DEFAULT 1 /* Default policy. */
+#define DB_LOCK_EXPIRE 2 /* Only expire locks, no detection. */
+#define DB_LOCK_MAXLOCKS 3 /* Select locker with max locks. */
+#define DB_LOCK_MAXWRITE 4 /* Select locker with max writelocks. */
+#define DB_LOCK_MINLOCKS 5 /* Select locker with min locks. */
+#define DB_LOCK_MINWRITE 6 /* Select locker with min writelocks. */
+#define DB_LOCK_OLDEST 7 /* Select oldest locker. */
+#define DB_LOCK_RANDOM 8 /* Select random locker. */
+#define DB_LOCK_YOUNGEST 9 /* Select youngest locker. */
+
+/*
+ * Simple R/W lock modes and for multi-granularity intention locking.
+ *
+ * !!!
+ * These values are NOT random, as they are used as an index into the lock
+ * conflicts arrays, i.e., DB_LOCK_IWRITE must be == 3, and DB_LOCK_IREAD
+ * must be == 4.
+ */
+typedef enum {
+ DB_LOCK_NG=0, /* Not granted. */
+ DB_LOCK_READ=1, /* Shared/read. */
+ DB_LOCK_WRITE=2, /* Exclusive/write. */
+ DB_LOCK_WAIT=3, /* Wait for event */
+ DB_LOCK_IWRITE=4, /* Intent exclusive/write. */
+ DB_LOCK_IREAD=5, /* Intent to share/read. */
+ DB_LOCK_IWR=6, /* Intent to read and write. */
+ DB_LOCK_READ_UNCOMMITTED=7, /* Degree 1 isolation. */
+ DB_LOCK_WWRITE=8 /* Was Written. */
+} db_lockmode_t;
+
+/*
+ * Request types.
+ */
+typedef enum {
+ DB_LOCK_DUMP=0, /* Display held locks. */
+ DB_LOCK_GET=1, /* Get the lock. */
+ DB_LOCK_GET_TIMEOUT=2, /* Get lock with a timeout. */
+ DB_LOCK_INHERIT=3, /* Pass locks to parent. */
+ DB_LOCK_PUT=4, /* Release the lock. */
+ DB_LOCK_PUT_ALL=5, /* Release locker's locks. */
+ DB_LOCK_PUT_OBJ=6, /* Release locker's locks on obj. */
+ DB_LOCK_PUT_READ=7, /* Release locker's read locks. */
+ DB_LOCK_TIMEOUT=8, /* Force a txn to timeout. */
+ DB_LOCK_TRADE=9, /* Trade locker ids on a lock. */
+ DB_LOCK_UPGRADE_WRITE=10 /* Upgrade writes for dirty reads. */
+} db_lockop_t;
+
+/*
+ * Status of a lock.
+ */
+typedef enum {
+ DB_LSTAT_ABORTED=1, /* Lock belongs to an aborted txn. */
+ DB_LSTAT_EXPIRED=2, /* Lock has expired. */
+ DB_LSTAT_FREE=3, /* Lock is unallocated. */
+ DB_LSTAT_HELD=4, /* Lock is currently held. */
+ DB_LSTAT_PENDING=5, /* Lock was waiting and has been
+ * promoted; waiting for the owner
+ * to run and upgrade it to held. */
+ DB_LSTAT_WAITING=6 /* Lock is on the wait queue. */
+}db_status_t;
+
+/* Lock statistics structure. */
+struct __db_lock_stat {
+ u_int32_t st_id; /* Last allocated locker ID. */
+ u_int32_t st_cur_maxid; /* Current maximum unused ID. */
+ u_int32_t st_maxlocks; /* Maximum number of locks in table. */
+ u_int32_t st_maxlockers; /* Maximum num of lockers in table. */
+ u_int32_t st_maxobjects; /* Maximum num of objects in table. */
+ u_int32_t st_partitions; /* number of partitions. */
+ int st_nmodes; /* Number of lock modes. */
+ u_int32_t st_nlockers; /* Current number of lockers. */
+#ifndef __TEST_DB_NO_STATISTICS
+ u_int32_t st_nlocks; /* Current number of locks. */
+ u_int32_t st_maxnlocks; /* Maximum number of locks so far. */
+ u_int32_t st_maxhlocks; /* Maximum number of locks in any bucket. */
+ uintmax_t st_locksteals; /* Number of lock steals so far. */
+ uintmax_t st_maxlsteals; /* Maximum number steals in any partition. */
+ u_int32_t st_maxnlockers; /* Maximum number of lockers so far. */
+ u_int32_t st_nobjects; /* Current number of objects. */
+ u_int32_t st_maxnobjects; /* Maximum number of objects so far. */
+ u_int32_t st_maxhobjects; /* Maximum number of objectsin any bucket. */
+ uintmax_t st_objectsteals; /* Number of objects steals so far. */
+ uintmax_t st_maxosteals; /* Maximum number of steals in any partition. */
+ uintmax_t st_nrequests; /* Number of lock gets. */
+ uintmax_t st_nreleases; /* Number of lock puts. */
+ uintmax_t st_nupgrade; /* Number of lock upgrades. */
+ uintmax_t st_ndowngrade; /* Number of lock downgrades. */
+ uintmax_t st_lock_wait; /* Lock conflicts w/ subsequent wait */
+ uintmax_t st_lock_nowait; /* Lock conflicts w/o subsequent wait */
+ uintmax_t st_ndeadlocks; /* Number of lock deadlocks. */
+ db_timeout_t st_locktimeout; /* Lock timeout. */
+ uintmax_t st_nlocktimeouts; /* Number of lock timeouts. */
+ db_timeout_t st_txntimeout; /* Transaction timeout. */
+ uintmax_t st_ntxntimeouts; /* Number of transaction timeouts. */
+ uintmax_t st_part_wait; /* Partition lock granted after wait. */
+ uintmax_t st_part_nowait; /* Partition lock granted without wait. */
+ uintmax_t st_part_max_wait; /* Max partition lock granted after wait. */
+ uintmax_t st_part_max_nowait; /* Max partition lock granted without wait. */
+ uintmax_t st_objs_wait; /* Object lock granted after wait. */
+ uintmax_t st_objs_nowait; /* Object lock granted without wait. */
+ uintmax_t st_lockers_wait; /* Locker lock granted after wait. */
+ uintmax_t st_lockers_nowait; /* Locker lock granted without wait. */
+ uintmax_t st_region_wait; /* Region lock granted after wait. */
+ uintmax_t st_region_nowait; /* Region lock granted without wait. */
+ u_int32_t st_hash_len; /* Max length of bucket. */
+ roff_t st_regsize; /* Region size. */
+#endif
+};
+
+struct __db_lock_hstat {
+ uintmax_t st_nrequests; /* Number of lock gets. */
+ uintmax_t st_nreleases; /* Number of lock puts. */
+ uintmax_t st_nupgrade; /* Number of lock upgrades. */
+ uintmax_t st_ndowngrade; /* Number of lock downgrades. */
+ u_int32_t st_nlocks; /* Current number of locks. */
+ u_int32_t st_maxnlocks; /* Maximum number of locks so far. */
+ u_int32_t st_nobjects; /* Current number of objects. */
+ u_int32_t st_maxnobjects; /* Maximum number of objects so far. */
+ uintmax_t st_lock_wait; /* Lock conflicts w/ subsequent wait */
+ uintmax_t st_lock_nowait; /* Lock conflicts w/o subsequent wait */
+ uintmax_t st_nlocktimeouts; /* Number of lock timeouts. */
+ uintmax_t st_ntxntimeouts; /* Number of transaction timeouts. */
+ u_int32_t st_hash_len; /* Max length of bucket. */
+};
+
+struct __db_lock_pstat {
+ u_int32_t st_nlocks; /* Current number of locks. */
+ u_int32_t st_maxnlocks; /* Maximum number of locks so far. */
+ u_int32_t st_nobjects; /* Current number of objects. */
+ u_int32_t st_maxnobjects; /* Maximum number of objects so far. */
+ uintmax_t st_locksteals; /* Number of lock steals so far. */
+ uintmax_t st_objectsteals; /* Number of objects steals so far. */
+};
+
+/*
+ * DB_LOCK_ILOCK --
+ * Internal DB access method lock.
+ */
+struct __db_ilock {
+ db_pgno_t pgno; /* Page being locked. */
+ u_int8_t fileid[DB_FILE_ID_LEN];/* File id. */
+#define DB_HANDLE_LOCK 1
+#define DB_RECORD_LOCK 2
+#define DB_PAGE_LOCK 3
+ u_int32_t type; /* Type of lock. */
+};
+
+/*
+ * DB_LOCK --
+ * The structure is allocated by the caller and filled in during a
+ * lock_get request (or a lock_vec/DB_LOCK_GET).
+ */
+struct __db_lock_u {
+ roff_t off; /* Offset of the lock in the region */
+ u_int32_t ndx; /* Index of the object referenced by
+ * this lock; used for locking. */
+ u_int32_t gen; /* Generation number of this lock. */
+ db_lockmode_t mode; /* mode of this lock. */
+};
+
+/* Lock request structure. */
+struct __db_lockreq {
+ db_lockop_t op; /* Operation. */
+ db_lockmode_t mode; /* Requested mode. */
+ db_timeout_t timeout; /* Time to expire lock. */
+ DBT *obj; /* Object being locked. */
+ DB_LOCK lock; /* Lock returned. */
+};
+
+/*******************************************************
+ * Logging.
+ *******************************************************/
+#define DB_LOGVERSION 16 /* Current log version. */
+#define DB_LOGVERSION_LATCHING 15 /* Log version using latching. */
+#define DB_LOGCHKSUM 12 /* Check sum headers. */
+#define DB_LOGOLDVER 8 /* Oldest log version supported. */
+#define DB_LOGMAGIC 0x040988
+
+/*
+ * A DB_LSN has two parts, a fileid which identifies a specific file, and an
+ * offset within that file. The fileid is an unsigned 4-byte quantity that
+ * uniquely identifies a file within the log directory -- currently a simple
+ * counter inside the log. The offset is also an unsigned 4-byte value. The
+ * log manager guarantees the offset is never more than 4 bytes by switching
+ * to a new log file before the maximum length imposed by an unsigned 4-byte
+ * offset is reached.
+ */
+struct __db_lsn {
+ u_int32_t file; /* File ID. */
+ u_int32_t offset; /* File offset. */
+};
+
+/*
+ * Application-specified log record types start at DB_user_BEGIN, and must not
+ * equal or exceed DB_debug_FLAG.
+ *
+ * DB_debug_FLAG is the high-bit of the u_int32_t that specifies a log record
+ * type. If the flag is set, it's a log record that was logged for debugging
+ * purposes only, even if it reflects a database change -- the change was part
+ * of a non-durable transaction.
+ */
+#define DB_user_BEGIN 10000
+#define DB_debug_FLAG 0x80000000
+
+/*
+ * DB_LOGC --
+ * Log cursor.
+ */
+struct __db_log_cursor {
+ ENV *env; /* Environment */
+
+ DB_FH *fhp; /* File handle. */
+ DB_LSN lsn; /* Cursor: LSN */
+ u_int32_t len; /* Cursor: record length */
+ u_int32_t prev; /* Cursor: previous record's offset */
+
+ DBT dbt; /* Return DBT. */
+ DB_LSN p_lsn; /* Persist LSN. */
+ u_int32_t p_version; /* Persist version. */
+
+ u_int8_t *bp; /* Allocated read buffer. */
+ u_int32_t bp_size; /* Read buffer length in bytes. */
+ u_int32_t bp_rlen; /* Read buffer valid data length. */
+ DB_LSN bp_lsn; /* Read buffer first byte LSN. */
+
+ u_int32_t bp_maxrec; /* Max record length in the log file. */
+
+ /* DB_LOGC PUBLIC HANDLE LIST BEGIN */
+ int (*close) __P((DB_LOGC *, u_int32_t));
+ int (*get) __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t));
+ int (*version) __P((DB_LOGC *, u_int32_t *, u_int32_t));
+ /* DB_LOGC PUBLIC HANDLE LIST END */
+
+#define DB_LOG_DISK 0x01 /* Log record came from disk. */
+#define DB_LOG_LOCKED 0x02 /* Log region already locked */
+#define DB_LOG_SILENT_ERR 0x04 /* Turn-off error messages. */
+ u_int32_t flags;
+};
+
+/* Log statistics structure. */
+struct __db_log_stat {
+ u_int32_t st_magic; /* Log file magic number. */
+ u_int32_t st_version; /* Log file version number. */
+ int st_mode; /* Log file permissions mode. */
+ u_int32_t st_lg_bsize; /* Log buffer size. */
+ u_int32_t st_lg_size; /* Log file size. */
+ u_int32_t st_wc_bytes; /* Bytes to log since checkpoint. */
+ u_int32_t st_wc_mbytes; /* Megabytes to log since checkpoint. */
+#ifndef __TEST_DB_NO_STATISTICS
+ uintmax_t st_record; /* Records entered into the log. */
+ u_int32_t st_w_bytes; /* Bytes to log. */
+ u_int32_t st_w_mbytes; /* Megabytes to log. */
+ uintmax_t st_wcount; /* Total I/O writes to the log. */
+ uintmax_t st_wcount_fill; /* Overflow writes to the log. */
+ uintmax_t st_rcount; /* Total I/O reads from the log. */
+ uintmax_t st_scount; /* Total syncs to the log. */
+ uintmax_t st_region_wait; /* Region lock granted after wait. */
+ uintmax_t st_region_nowait; /* Region lock granted without wait. */
+ u_int32_t st_cur_file; /* Current log file number. */
+ u_int32_t st_cur_offset; /* Current log file offset. */
+ u_int32_t st_disk_file; /* Known on disk log file number. */
+ u_int32_t st_disk_offset; /* Known on disk log file offset. */
+ u_int32_t st_maxcommitperflush; /* Max number of commits in a flush. */
+ u_int32_t st_mincommitperflush; /* Min number of commits in a flush. */
+ roff_t st_regsize; /* Region size. */
+#endif
+};
+
+/*
+ * We need to record the first log record of a transaction. For user
+ * defined logging this macro returns the place to put that information,
+ * if it is need in rlsnp, otherwise it leaves it unchanged. We also
+ * need to track the last record of the transaction, this returns the
+ * place to put that info.
+ */
+#define DB_SET_TXN_LSNP(txn, blsnp, llsnp) \
+ ((txn)->set_txn_lsnp(txn, blsnp, llsnp))
+
+/*******************************************************
+ * Shared buffer cache (mpool).
+ *******************************************************/
+/* Priority values for DB_MPOOLFILE->{put,set_priority}. */
+typedef enum {
+ DB_PRIORITY_UNCHANGED=0,
+ DB_PRIORITY_VERY_LOW=1,
+ DB_PRIORITY_LOW=2,
+ DB_PRIORITY_DEFAULT=3,
+ DB_PRIORITY_HIGH=4,
+ DB_PRIORITY_VERY_HIGH=5
+} DB_CACHE_PRIORITY;
+
+/* Per-process DB_MPOOLFILE information. */
+struct __db_mpoolfile {
+ DB_FH *fhp; /* Underlying file handle. */
+
+ /*
+ * !!!
+ * The ref, pinref and q fields are protected by the region lock.
+ */
+ u_int32_t ref; /* Reference count. */
+
+ u_int32_t pinref; /* Pinned block reference count. */
+
+ /*
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * TAILQ_ENTRY(__db_mpoolfile) q;
+ */
+ struct {
+ struct __db_mpoolfile *tqe_next;
+ struct __db_mpoolfile **tqe_prev;
+ } q; /* Linked list of DB_MPOOLFILE's. */
+
+ /*
+ * !!!
+ * The rest of the fields (with the exception of the MP_FLUSH flag)
+ * are not thread-protected, even when they may be modified at any
+ * time by the application. The reason is the DB_MPOOLFILE handle
+ * is single-threaded from the viewpoint of the application, and so
+ * the only fields needing to be thread-protected are those accessed
+ * by checkpoint or sync threads when using DB_MPOOLFILE structures
+ * to flush buffers from the cache.
+ */
+ ENV *env; /* Environment */
+ MPOOLFILE *mfp; /* Underlying MPOOLFILE. */
+
+ u_int32_t clear_len; /* Cleared length on created pages. */
+ u_int8_t /* Unique file ID. */
+ fileid[DB_FILE_ID_LEN];
+ int ftype; /* File type. */
+ int32_t lsn_offset; /* LSN offset in page. */
+ u_int32_t gbytes, bytes; /* Maximum file size. */
+ DBT *pgcookie; /* Byte-string passed to pgin/pgout. */
+ int32_t priority; /* Cache priority. */
+
+ void *addr; /* Address of mmap'd region. */
+ size_t len; /* Length of mmap'd region. */
+
+ u_int32_t config_flags; /* Flags to DB_MPOOLFILE->set_flags. */
+
+ /* DB_MPOOLFILE PUBLIC HANDLE LIST BEGIN */
+ int (*close) __P((DB_MPOOLFILE *, u_int32_t));
+ int (*get)
+ __P((DB_MPOOLFILE *, db_pgno_t *, DB_TXN *, u_int32_t, void *));
+ int (*get_clear_len) __P((DB_MPOOLFILE *, u_int32_t *));
+ int (*get_fileid) __P((DB_MPOOLFILE *, u_int8_t *));
+ int (*get_flags) __P((DB_MPOOLFILE *, u_int32_t *));
+ int (*get_ftype) __P((DB_MPOOLFILE *, int *));
+ int (*get_last_pgno) __P((DB_MPOOLFILE *, db_pgno_t *));
+ int (*get_lsn_offset) __P((DB_MPOOLFILE *, int32_t *));
+ int (*get_maxsize) __P((DB_MPOOLFILE *, u_int32_t *, u_int32_t *));
+ int (*get_pgcookie) __P((DB_MPOOLFILE *, DBT *));
+ int (*get_priority) __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY *));
+ int (*open) __P((DB_MPOOLFILE *, const char *, u_int32_t, int, size_t));
+ int (*put) __P((DB_MPOOLFILE *, void *, DB_CACHE_PRIORITY, u_int32_t));
+ int (*set_clear_len) __P((DB_MPOOLFILE *, u_int32_t));
+ int (*set_fileid) __P((DB_MPOOLFILE *, u_int8_t *));
+ int (*set_flags) __P((DB_MPOOLFILE *, u_int32_t, int));
+ int (*set_ftype) __P((DB_MPOOLFILE *, int));
+ int (*set_lsn_offset) __P((DB_MPOOLFILE *, int32_t));
+ int (*set_maxsize) __P((DB_MPOOLFILE *, u_int32_t, u_int32_t));
+ int (*set_pgcookie) __P((DB_MPOOLFILE *, DBT *));
+ int (*set_priority) __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY));
+ int (*sync) __P((DB_MPOOLFILE *));
+ /* DB_MPOOLFILE PUBLIC HANDLE LIST END */
+
+ /*
+ * MP_FILEID_SET, MP_OPEN_CALLED and MP_READONLY do not need to be
+ * thread protected because they are initialized before the file is
+ * linked onto the per-process lists, and never modified.
+ *
+ * MP_FLUSH is thread protected because it is potentially read/set by
+ * multiple threads of control.
+ */
+#define MP_FILEID_SET 0x001 /* Application supplied a file ID. */
+#define MP_FLUSH 0x002 /* Was opened to flush a buffer. */
+#define MP_MULTIVERSION 0x004 /* Opened for multiversion access. */
+#define MP_OPEN_CALLED 0x008 /* File opened. */
+#define MP_READONLY 0x010 /* File is readonly. */
+#define MP_DUMMY 0x020 /* File is dummy for __memp_fput. */
+ u_int32_t flags;
+};
+
+/* Mpool statistics structure. */
+struct __db_mpool_stat {
+ u_int32_t st_gbytes; /* Total cache size: GB. */
+ u_int32_t st_bytes; /* Total cache size: B. */
+ u_int32_t st_ncache; /* Number of cache regions. */
+ u_int32_t st_max_ncache; /* Maximum number of regions. */
+ size_t st_mmapsize; /* Maximum file size for mmap. */
+ int st_maxopenfd; /* Maximum number of open fd's. */
+ int st_maxwrite; /* Maximum buffers to write. */
+ db_timeout_t st_maxwrite_sleep; /* Sleep after writing max buffers. */
+ u_int32_t st_pages; /* Total number of pages. */
+#ifndef __TEST_DB_NO_STATISTICS
+ u_int32_t st_map; /* Pages from mapped files. */
+ uintmax_t st_cache_hit; /* Pages found in the cache. */
+ uintmax_t st_cache_miss; /* Pages not found in the cache. */
+ uintmax_t st_page_create; /* Pages created in the cache. */
+ uintmax_t st_page_in; /* Pages read in. */
+ uintmax_t st_page_out; /* Pages written out. */
+ uintmax_t st_ro_evict; /* Clean pages forced from the cache. */
+ uintmax_t st_rw_evict; /* Dirty pages forced from the cache. */
+ uintmax_t st_page_trickle; /* Pages written by memp_trickle. */
+ u_int32_t st_page_clean; /* Clean pages. */
+ u_int32_t st_page_dirty; /* Dirty pages. */
+ u_int32_t st_hash_buckets; /* Number of hash buckets. */
+ u_int32_t st_pagesize; /* Assumed page size. */
+ u_int32_t st_hash_searches; /* Total hash chain searches. */
+ u_int32_t st_hash_longest; /* Longest hash chain searched. */
+ uintmax_t st_hash_examined; /* Total hash entries searched. */
+ uintmax_t st_hash_nowait; /* Hash lock granted with nowait. */
+ uintmax_t st_hash_wait; /* Hash lock granted after wait. */
+ uintmax_t st_hash_max_nowait; /* Max hash lock granted with nowait. */
+ uintmax_t st_hash_max_wait; /* Max hash lock granted after wait. */
+ uintmax_t st_region_nowait; /* Region lock granted with nowait. */
+ uintmax_t st_region_wait; /* Region lock granted after wait. */
+ uintmax_t st_mvcc_frozen; /* Buffers frozen. */
+ uintmax_t st_mvcc_thawed; /* Buffers thawed. */
+ uintmax_t st_mvcc_freed; /* Frozen buffers freed. */
+ uintmax_t st_alloc; /* Number of page allocations. */
+ uintmax_t st_alloc_buckets; /* Buckets checked during allocation. */
+ uintmax_t st_alloc_max_buckets;/* Max checked during allocation. */
+ uintmax_t st_alloc_pages; /* Pages checked during allocation. */
+ uintmax_t st_alloc_max_pages; /* Max checked during allocation. */
+ uintmax_t st_io_wait; /* Thread waited on buffer I/O. */
+ uintmax_t st_sync_interrupted; /* Number of times sync interrupted. */
+ roff_t st_regsize; /* Region size. */
+#endif
+};
+
+/* Mpool file statistics structure. */
+struct __db_mpool_fstat {
+ char *file_name; /* File name. */
+ u_int32_t st_pagesize; /* Page size. */
+#ifndef __TEST_DB_NO_STATISTICS
+ u_int32_t st_map; /* Pages from mapped files. */
+ uintmax_t st_cache_hit; /* Pages found in the cache. */
+ uintmax_t st_cache_miss; /* Pages not found in the cache. */
+ uintmax_t st_page_create; /* Pages created in the cache. */
+ uintmax_t st_page_in; /* Pages read in. */
+ uintmax_t st_page_out; /* Pages written out. */
+#endif
+};
+
+/*******************************************************
+ * Transactions and recovery.
+ *******************************************************/
+#define DB_TXNVERSION 1
+
+typedef enum {
+ DB_TXN_ABORT=0, /* Public. */
+ DB_TXN_APPLY=1, /* Public. */
+ DB_TXN_BACKWARD_ROLL=3, /* Public. */
+ DB_TXN_FORWARD_ROLL=4, /* Public. */
+ DB_TXN_OPENFILES=5, /* Internal. */
+ DB_TXN_POPENFILES=6, /* Internal. */
+ DB_TXN_PRINT=7 /* Public. */
+} db_recops;
+
+/*
+ * BACKWARD_ALLOC is used during the forward pass to pick up any aborted
+ * allocations for files that were created during the forward pass.
+ * The main difference between _ALLOC and _ROLL is that the entry for
+ * the file not exist during the rollforward pass.
+ */
+#define DB_UNDO(op) ((op) == DB_TXN_ABORT || (op) == DB_TXN_BACKWARD_ROLL)
+#define DB_REDO(op) ((op) == DB_TXN_FORWARD_ROLL || (op) == DB_TXN_APPLY)
+
+struct __db_txn {
+ DB_TXNMGR *mgrp; /* Pointer to transaction manager. */
+ DB_TXN *parent; /* Pointer to transaction's parent. */
+ DB_THREAD_INFO *thread_info; /* Pointer to thread information. */
+
+ u_int32_t txnid; /* Unique transaction id. */
+ char *name; /* Transaction name. */
+ DB_LOCKER *locker; /* Locker for this txn. */
+
+ void *td; /* Detail structure within region. */
+ db_timeout_t lock_timeout; /* Timeout for locks for this txn. */
+ db_timeout_t expire; /* Time transaction expires. */
+ void *txn_list; /* Undo information for parent. */
+
+ /*
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * TAILQ_ENTRY(__db_txn) links;
+ */
+ struct {
+ struct __db_txn *tqe_next;
+ struct __db_txn **tqe_prev;
+ } links; /* Links transactions off manager. */
+
+ /*
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * TAILQ_HEAD(__kids, __db_txn) kids;
+ */
+ struct __kids {
+ struct __db_txn *tqh_first;
+ struct __db_txn **tqh_last;
+ } kids;
+
+ /*
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * TAILQ_HEAD(__events, __txn_event) events;
+ */
+ struct {
+ struct __txn_event *tqh_first;
+ struct __txn_event **tqh_last;
+ } events; /* Links deferred events. */
+
+ /*
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * STAILQ_HEAD(__logrec, __txn_logrec) logs;
+ */
+ struct {
+ struct __txn_logrec *stqh_first;
+ struct __txn_logrec **stqh_last;
+ } logs; /* Links in memory log records. */
+
+ /*
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * TAILQ_ENTRY(__db_txn) klinks;
+ */
+ struct {
+ struct __db_txn *tqe_next;
+ struct __db_txn **tqe_prev;
+ } klinks;
+
+ void *api_internal; /* C++ API private. */
+ void *xml_internal; /* XML API private. */
+
+ u_int32_t cursors; /* Number of cursors open for txn */
+
+ /* DB_TXN PUBLIC HANDLE LIST BEGIN */
+ int (*abort) __P((DB_TXN *));
+ int (*commit) __P((DB_TXN *, u_int32_t));
+ int (*discard) __P((DB_TXN *, u_int32_t));
+ int (*get_name) __P((DB_TXN *, const char **));
+ u_int32_t (*id) __P((DB_TXN *));
+ int (*prepare) __P((DB_TXN *, u_int8_t *));
+ int (*set_name) __P((DB_TXN *, const char *));
+ int (*set_timeout) __P((DB_TXN *, db_timeout_t, u_int32_t));
+ /* DB_TXN PUBLIC HANDLE LIST END */
+
+ /* DB_TXN PRIVATE HANDLE LIST BEGIN */
+ void (*set_txn_lsnp) __P((DB_TXN *txn, DB_LSN **, DB_LSN **));
+ /* DB_TXN PRIVATE HANDLE LIST END */
+
+#define TXN_CHILDCOMMIT 0x0001 /* Txn has committed. */
+#define TXN_CDSGROUP 0x0002 /* CDS group handle. */
+#define TXN_COMPENSATE 0x0004 /* Compensating transaction. */
+#define TXN_DEADLOCK 0x0008 /* Txn has deadlocked. */
+#define TXN_LOCKTIMEOUT 0x0010 /* Txn has a lock timeout. */
+#define TXN_MALLOC 0x0020 /* Structure allocated by TXN system. */
+#define TXN_NOSYNC 0x0040 /* Do not sync on prepare and commit. */
+#define TXN_NOWAIT 0x0080 /* Do not wait on locks. */
+#define TXN_PRIVATE 0x0100 /* Txn owned by cursor.. */
+#define TXN_READ_COMMITTED 0x0200 /* Txn has degree 2 isolation. */
+#define TXN_READ_UNCOMMITTED 0x0400 /* Txn has degree 1 isolation. */
+#define TXN_RESTORED 0x0800 /* Txn has been restored. */
+#define TXN_SNAPSHOT 0x1000 /* Snapshot Isolation. */
+#define TXN_SYNC 0x2000 /* Write and sync on prepare/commit. */
+#define TXN_WRITE_NOSYNC 0x4000 /* Write only on prepare/commit. */
+ u_int32_t flags;
+};
+
+#define TXN_SYNC_FLAGS (TXN_SYNC | TXN_NOSYNC | TXN_WRITE_NOSYNC)
+
+/*
+ * Structure used for two phase commit interface.
+ * We set the size of our global transaction id (gid) to be 128 in order
+ * to match that defined by the XA X/Open standard.
+ */
+#define DB_GID_SIZE 128
+struct __db_preplist {
+ DB_TXN *txn;
+ u_int8_t gid[DB_GID_SIZE];
+};
+
+/* Transaction statistics structure. */
+struct __db_txn_active {
+ u_int32_t txnid; /* Transaction ID */
+ u_int32_t parentid; /* Transaction ID of parent */
+ pid_t pid; /* Process owning txn ID */
+ db_threadid_t tid; /* Thread owning txn ID */
+
+ DB_LSN lsn; /* LSN when transaction began */
+
+ DB_LSN read_lsn; /* Read LSN for MVCC */
+ u_int32_t mvcc_ref; /* MVCC reference count */
+
+#define TXN_ABORTED 1
+#define TXN_COMMITTED 2
+#define TXN_PREPARED 3
+#define TXN_RUNNING 4
+ u_int32_t status; /* Status of the transaction */
+
+ u_int8_t gid[DB_GID_SIZE]; /* Global transaction ID */
+ char name[51]; /* 50 bytes of name, nul termination */
+};
+
+struct __db_txn_stat {
+ u_int32_t st_nrestores; /* number of restored transactions
+ after recovery. */
+#ifndef __TEST_DB_NO_STATISTICS
+ DB_LSN st_last_ckp; /* lsn of the last checkpoint */
+ time_t st_time_ckp; /* time of last checkpoint */
+ u_int32_t st_last_txnid; /* last transaction id given out */
+ u_int32_t st_maxtxns; /* maximum txns possible */
+ uintmax_t st_naborts; /* number of aborted transactions */
+ uintmax_t st_nbegins; /* number of begun transactions */
+ uintmax_t st_ncommits; /* number of committed transactions */
+ u_int32_t st_nactive; /* number of active transactions */
+ u_int32_t st_nsnapshot; /* number of snapshot transactions */
+ u_int32_t st_maxnactive; /* maximum active transactions */
+ u_int32_t st_maxnsnapshot; /* maximum snapshot transactions */
+ DB_TXN_ACTIVE *st_txnarray; /* array of active transactions */
+ uintmax_t st_region_wait; /* Region lock granted after wait. */
+ uintmax_t st_region_nowait; /* Region lock granted without wait. */
+ roff_t st_regsize; /* Region size. */
+#endif
+};
+
+/*******************************************************
+ * Replication.
+ *******************************************************/
+/* Special, out-of-band environment IDs. */
+#define DB_EID_BROADCAST -1
+#define DB_EID_INVALID -2
+
+#define DB_REP_DEFAULT_PRIORITY 100
+
+/* Acknowledgement policies. */
+#define DB_REPMGR_ACKS_ALL 1
+#define DB_REPMGR_ACKS_ALL_PEERS 2
+#define DB_REPMGR_ACKS_NONE 3
+#define DB_REPMGR_ACKS_ONE 4
+#define DB_REPMGR_ACKS_ONE_PEER 5
+#define DB_REPMGR_ACKS_QUORUM 6
+
+/* Replication timeout configuration values. */
+#define DB_REP_ACK_TIMEOUT 1 /* RepMgr acknowledgements. */
+#define DB_REP_CHECKPOINT_DELAY 2 /* Master checkpoint delay. */
+#define DB_REP_CONNECTION_RETRY 3 /* RepMgr connections. */
+#define DB_REP_ELECTION_RETRY 4 /* RepMgr elect retries. */
+#define DB_REP_ELECTION_TIMEOUT 5 /* Rep normal elections. */
+#define DB_REP_FULL_ELECTION_TIMEOUT 6 /* Rep full elections. */
+#define DB_REP_HEARTBEAT_MONITOR 7 /* RepMgr client HB monitor. */
+#define DB_REP_HEARTBEAT_SEND 8 /* RepMgr master send freq. */
+#define DB_REP_LEASE_TIMEOUT 9 /* Master leases. */
+
+/* Event notification types. */
+#define DB_EVENT_NO_SUCH_EVENT 0 /* out-of-band sentinel value */
+#define DB_EVENT_PANIC 1
+#define DB_EVENT_REG_ALIVE 2
+#define DB_EVENT_REG_PANIC 3
+#define DB_EVENT_REP_CLIENT 4
+#define DB_EVENT_REP_ELECTED 5
+#define DB_EVENT_REP_MASTER 6
+#define DB_EVENT_REP_NEWMASTER 7
+#define DB_EVENT_REP_PERM_FAILED 8
+#define DB_EVENT_REP_STARTUPDONE 9
+#define DB_EVENT_WRITE_FAILED 10
+
+/* Replication Manager site status. */
+struct __db_repmgr_site {
+ int eid;
+ char *host;
+ u_int port;
+
+#define DB_REPMGR_CONNECTED 0x01
+#define DB_REPMGR_DISCONNECTED 0x02
+ u_int32_t status;
+};
+
+/* Replication statistics. */
+struct __db_rep_stat {
+ /* !!!
+ * Many replication statistics fields cannot be protected by a mutex
+ * without an unacceptable performance penalty, since most message
+ * processing is done without the need to hold a region-wide lock.
+ * Fields whose comments end with a '+' may be updated without holding
+ * the replication or log mutexes (as appropriate), and thus may be
+ * off somewhat (or, on unreasonable architectures under unlucky
+ * circumstances, garbaged).
+ */
+ uintmax_t st_log_queued; /* Log records currently queued.+ */
+ u_int32_t st_startup_complete; /* Site completed client sync-up. */
+#ifndef __TEST_DB_NO_STATISTICS
+ u_int32_t st_status; /* Current replication status. */
+ DB_LSN st_next_lsn; /* Next LSN to use or expect. */
+ DB_LSN st_waiting_lsn; /* LSN we're awaiting, if any. */
+ DB_LSN st_max_perm_lsn; /* Maximum permanent LSN. */
+ db_pgno_t st_next_pg; /* Next pg we expect. */
+ db_pgno_t st_waiting_pg; /* pg we're awaiting, if any. */
+
+ u_int32_t st_dupmasters; /* # of times a duplicate master
+ condition was detected.+ */
+ int st_env_id; /* Current environment ID. */
+ u_int32_t st_env_priority; /* Current environment priority. */
+ uintmax_t st_bulk_fills; /* Bulk buffer fills. */
+ uintmax_t st_bulk_overflows; /* Bulk buffer overflows. */
+ uintmax_t st_bulk_records; /* Bulk records stored. */
+ uintmax_t st_bulk_transfers; /* Transfers of bulk buffers. */
+ uintmax_t st_client_rerequests;/* Number of forced rerequests. */
+ uintmax_t st_client_svc_req; /* Number of client service requests
+ received by this client. */
+ uintmax_t st_client_svc_miss; /* Number of client service requests
+ missing on this client. */
+ u_int32_t st_gen; /* Current generation number. */
+ u_int32_t st_egen; /* Current election gen number. */
+ uintmax_t st_log_duplicated; /* Log records received multiply.+ */
+ uintmax_t st_log_queued_max; /* Max. log records queued at once.+ */
+ uintmax_t st_log_queued_total; /* Total # of log recs. ever queued.+ */
+ uintmax_t st_log_records; /* Log records received and put.+ */
+ uintmax_t st_log_requested; /* Log recs. missed and requested.+ */
+ int st_master; /* Env. ID of the current master. */
+ uintmax_t st_master_changes; /* # of times we've switched masters. */
+ uintmax_t st_msgs_badgen; /* Messages with a bad generation #.+ */
+ uintmax_t st_msgs_processed; /* Messages received and processed.+ */
+ uintmax_t st_msgs_recover; /* Messages ignored because this site
+ was a client in recovery.+ */
+ uintmax_t st_msgs_send_failures;/* # of failed message sends.+ */
+ uintmax_t st_msgs_sent; /* # of successful message sends.+ */
+ uintmax_t st_newsites; /* # of NEWSITE msgs. received.+ */
+ u_int32_t st_nsites; /* Current number of sites we will
+ assume during elections. */
+ uintmax_t st_nthrottles; /* # of times we were throttled. */
+ uintmax_t st_outdated; /* # of times we detected and returned
+ an OUTDATED condition.+ */
+ uintmax_t st_pg_duplicated; /* Pages received multiply.+ */
+ uintmax_t st_pg_records; /* Pages received and stored.+ */
+ uintmax_t st_pg_requested; /* Pages missed and requested.+ */
+ uintmax_t st_txns_applied; /* # of transactions applied.+ */
+ uintmax_t st_startsync_delayed;/* # of STARTSYNC msgs delayed.+ */
+
+ /* Elections generally. */
+ uintmax_t st_elections; /* # of elections held.+ */
+ uintmax_t st_elections_won; /* # of elections won by this site.+ */
+
+ /* Statistics about an in-progress election. */
+ int st_election_cur_winner; /* Current front-runner. */
+ u_int32_t st_election_gen; /* Election generation number. */
+ DB_LSN st_election_lsn; /* Max. LSN of current winner. */
+ u_int32_t st_election_nsites; /* # of "registered voters". */
+ u_int32_t st_election_nvotes; /* # of "registered voters" needed. */
+ u_int32_t st_election_priority; /* Current election priority. */
+ int st_election_status; /* Current election status. */
+ u_int32_t st_election_tiebreaker;/* Election tiebreaker value. */
+ u_int32_t st_election_votes; /* Votes received in this round. */
+ u_int32_t st_election_sec; /* Last election time seconds. */
+ u_int32_t st_election_usec; /* Last election time useconds. */
+ u_int32_t st_max_lease_sec; /* Maximum lease timestamp seconds. */
+ u_int32_t st_max_lease_usec; /* Maximum lease timestamp useconds. */
+
+ /* Undocumented statistics only used by the test system. */
+#ifdef CONFIG_TEST
+ u_int32_t st_filefail_cleanups; /* # of FILE_FAIL cleanups done. */
+#endif
+#endif
+};
+
+/* Replication Manager statistics. */
+struct __db_repmgr_stat {
+ uintmax_t st_perm_failed; /* # of insufficiently ack'ed msgs. */
+ uintmax_t st_msgs_queued; /* # msgs queued for network delay. */
+ uintmax_t st_msgs_dropped; /* # msgs discarded due to excessive
+ queue length. */
+ uintmax_t st_connection_drop; /* Existing connections dropped. */
+ uintmax_t st_connect_fail; /* Failed new connection attempts. */
+};
+
+/*******************************************************
+ * Sequences.
+ *******************************************************/
+/*
+ * The storage record for a sequence.
+ */
+struct __db_seq_record {
+ u_int32_t seq_version; /* Version size/number. */
+ u_int32_t flags; /* DB_SEQ_XXX Flags. */
+ db_seq_t seq_value; /* Current value. */
+ db_seq_t seq_max; /* Max permitted. */
+ db_seq_t seq_min; /* Min permitted. */
+};
+
+/*
+ * Handle for a sequence object.
+ */
+struct __db_sequence {
+ DB *seq_dbp; /* DB handle for this sequence. */
+ db_mutex_t mtx_seq; /* Mutex if sequence is threaded. */
+ DB_SEQ_RECORD *seq_rp; /* Pointer to current data. */
+ DB_SEQ_RECORD seq_record; /* Data from DB_SEQUENCE. */
+ int32_t seq_cache_size; /* Number of values cached. */
+ db_seq_t seq_last_value; /* Last value cached. */
+ DBT seq_key; /* DBT pointing to sequence key. */
+ DBT seq_data; /* DBT pointing to seq_record. */
+
+ /* API-private structure: used by C++ and Java. */
+ void *api_internal;
+
+ /* DB_SEQUENCE PUBLIC HANDLE LIST BEGIN */
+ int (*close) __P((DB_SEQUENCE *, u_int32_t));
+ int (*get) __P((DB_SEQUENCE *,
+ DB_TXN *, int32_t, db_seq_t *, u_int32_t));
+ int (*get_cachesize) __P((DB_SEQUENCE *, int32_t *));
+ int (*get_db) __P((DB_SEQUENCE *, DB **));
+ int (*get_flags) __P((DB_SEQUENCE *, u_int32_t *));
+ int (*get_key) __P((DB_SEQUENCE *, DBT *));
+ int (*get_range) __P((DB_SEQUENCE *,
+ db_seq_t *, db_seq_t *));
+ int (*initial_value) __P((DB_SEQUENCE *, db_seq_t));
+ int (*open) __P((DB_SEQUENCE *,
+ DB_TXN *, DBT *, u_int32_t));
+ int (*remove) __P((DB_SEQUENCE *, DB_TXN *, u_int32_t));
+ int (*set_cachesize) __P((DB_SEQUENCE *, int32_t));
+ int (*set_flags) __P((DB_SEQUENCE *, u_int32_t));
+ int (*set_range) __P((DB_SEQUENCE *, db_seq_t, db_seq_t));
+ int (*stat) __P((DB_SEQUENCE *,
+ DB_SEQUENCE_STAT **, u_int32_t));
+ int (*stat_print) __P((DB_SEQUENCE *, u_int32_t));
+ /* DB_SEQUENCE PUBLIC HANDLE LIST END */
+};
+
+struct __db_seq_stat {
+ uintmax_t st_wait; /* Sequence lock granted w/o wait. */
+ uintmax_t st_nowait; /* Sequence lock granted after wait. */
+ db_seq_t st_current; /* Current value in db. */
+ db_seq_t st_value; /* Current cached value. */
+ db_seq_t st_last_value; /* Last cached value. */
+ db_seq_t st_min; /* Minimum value. */
+ db_seq_t st_max; /* Maximum value. */
+ int32_t st_cache_size; /* Cache size. */
+ u_int32_t st_flags; /* Flag value. */
+};
+
+/*******************************************************
+ * Access methods.
+ *******************************************************/
+typedef enum {
+ DB_BTREE=1,
+ DB_HASH=2,
+ DB_RECNO=3,
+ DB_QUEUE=4,
+ DB_UNKNOWN=5 /* Figure it out on open. */
+} DBTYPE;
+
+#define DB_RENAMEMAGIC 0x030800 /* File has been renamed. */
+
+#define DB_BTREEVERSION 9 /* Current btree version. */
+#define DB_BTREEOLDVER 8 /* Oldest btree version supported. */
+#define DB_BTREEMAGIC 0x053162
+
+#define DB_HASHVERSION 9 /* Current hash version. */
+#define DB_HASHOLDVER 7 /* Oldest hash version supported. */
+#define DB_HASHMAGIC 0x061561
+
+#define DB_QAMVERSION 4 /* Current queue version. */
+#define DB_QAMOLDVER 3 /* Oldest queue version supported. */
+#define DB_QAMMAGIC 0x042253
+
+#define DB_SEQUENCE_VERSION 2 /* Current sequence version. */
+#define DB_SEQUENCE_OLDVER 1 /* Oldest sequence version supported. */
+
+/*
+ * DB access method and cursor operation values. Each value is an operation
+ * code to which additional bit flags are added.
+ */
+#define DB_AFTER 1 /* Dbc.put */
+#define DB_APPEND 2 /* Db.put */
+#define DB_BEFORE 3 /* Dbc.put */
+#define DB_CONSUME 4 /* Db.get */
+#define DB_CONSUME_WAIT 5 /* Db.get */
+#define DB_CURRENT 6 /* Dbc.get, Dbc.put, DbLogc.get */
+#define DB_FIRST 7 /* Dbc.get, DbLogc->get */
+#define DB_GET_BOTH 8 /* Db.get, Dbc.get */
+#define DB_GET_BOTHC 9 /* Dbc.get (internal) */
+#define DB_GET_BOTH_RANGE 10 /* Db.get, Dbc.get */
+#define DB_GET_RECNO 11 /* Dbc.get */
+#define DB_JOIN_ITEM 12 /* Dbc.get; don't do primary lookup */
+#define DB_KEYFIRST 13 /* Dbc.put */
+#define DB_KEYLAST 14 /* Dbc.put */
+#define DB_LAST 15 /* Dbc.get, DbLogc->get */
+#define DB_NEXT 16 /* Dbc.get, DbLogc->get */
+#define DB_NEXT_DUP 17 /* Dbc.get */
+#define DB_NEXT_NODUP 18 /* Dbc.get */
+#define DB_NODUPDATA 19 /* Db.put, Dbc.put */
+#define DB_NOOVERWRITE 20 /* Db.put */
+#define DB_NOSYNC 21 /* Db.close */
+#define DB_OVERWRITE_DUP 22 /* Dbc.put, Db.put; no DB_KEYEXIST */
+#define DB_POSITION 23 /* Dbc.dup */
+#define DB_PREV 24 /* Dbc.get, DbLogc->get */
+#define DB_PREV_DUP 25 /* Dbc.get */
+#define DB_PREV_NODUP 26 /* Dbc.get */
+#define DB_SET 27 /* Dbc.get, DbLogc->get */
+#define DB_SET_RANGE 28 /* Dbc.get */
+#define DB_SET_RECNO 29 /* Db.get, Dbc.get */
+#define DB_UPDATE_SECONDARY 30 /* Dbc.get, Dbc.del (internal) */
+#define DB_SET_LTE 31 /* Dbc.get (internal) */
+#define DB_GET_BOTH_LTE 32 /* Dbc.get (internal) */
+
+/* This has to change when the max opcode hits 255. */
+#define DB_OPFLAGS_MASK 0x000000ff /* Mask for operations flags. */
+
+/*
+ * DB (user visible) error return codes.
+ *
+ * !!!
+ * We don't want our error returns to conflict with other packages where
+ * possible, so pick a base error value that's hopefully not common. We
+ * document that we own the error name space from -30,800 to -30,999.
+ */
+/* DB (public) error return codes. */
+#define DB_BUFFER_SMALL (-30999)/* User memory too small for return. */
+#define DB_DONOTINDEX (-30998)/* "Null" return from 2ndary callbk. */
+#define DB_FOREIGN_CONFLICT (-30997)/* A foreign db constraint triggered. */
+#define DB_KEYEMPTY (-30996)/* Key/data deleted or never created. */
+#define DB_KEYEXIST (-30995)/* The key/data pair already exists. */
+#define DB_LOCK_DEADLOCK (-30994)/* Deadlock. */
+#define DB_LOCK_NOTGRANTED (-30993)/* Lock unavailable. */
+#define DB_LOG_BUFFER_FULL (-30992)/* In-memory log buffer full. */
+#define DB_NOSERVER (-30991)/* Server panic return. */
+#define DB_NOSERVER_HOME (-30990)/* Bad home sent to server. */
+#define DB_NOSERVER_ID (-30989)/* Bad ID sent to server. */
+#define DB_NOTFOUND (-30988)/* Key/data pair not found (EOF). */
+#define DB_OLD_VERSION (-30987)/* Out-of-date version. */
+#define DB_PAGE_NOTFOUND (-30986)/* Requested page not found. */
+#define DB_REP_DUPMASTER (-30985)/* There are two masters. */
+#define DB_REP_HANDLE_DEAD (-30984)/* Rolled back a commit. */
+#define DB_REP_HOLDELECTION (-30983)/* Time to hold an election. */
+#define DB_REP_IGNORE (-30982)/* This msg should be ignored.*/
+#define DB_REP_ISPERM (-30981)/* Cached not written perm written.*/
+#define DB_REP_JOIN_FAILURE (-30980)/* Unable to join replication group. */
+#define DB_REP_LEASE_EXPIRED (-30979)/* Master lease has expired. */
+#define DB_REP_LOCKOUT (-30978)/* API/Replication lockout now. */
+#define DB_REP_NEWSITE (-30977)/* New site entered system. */
+#define DB_REP_NOTPERM (-30976)/* Permanent log record not written. */
+#define DB_REP_UNAVAIL (-30975)/* Site cannot currently be reached. */
+#define DB_RUNRECOVERY (-30974)/* Panic return. */
+#define DB_SECONDARY_BAD (-30973)/* Secondary index corrupt. */
+#define DB_VERIFY_BAD (-30972)/* Verify failed; bad format. */
+#define DB_VERSION_MISMATCH (-30971)/* Environment version mismatch. */
+
+/* DB (private) error return codes. */
+#define DB_ALREADY_ABORTED (-30899)
+#define DB_DELETED (-30898)/* Recovery file marked deleted. */
+#define DB_EVENT_NOT_HANDLED (-30897)/* Forward event to application. */
+#define DB_NEEDSPLIT (-30896)/* Page needs to be split. */
+#define DB_REP_BULKOVF (-30895)/* Rep bulk buffer overflow. */
+#define DB_REP_EGENCHG (-30894)/* Egen changed while in election. */
+#define DB_REP_LOGREADY (-30893)/* Rep log ready for recovery. */
+#define DB_REP_NEWMASTER (-30892)/* We have learned of a new master. */
+#define DB_REP_PAGEDONE (-30891)/* This page was already done. */
+#define DB_REP_PAGELOCKED (-30890)/* Page we want is locked. */
+#define DB_SURPRISE_KID (-30889)/* Child commit where parent
+ didn't know it was a parent. */
+#define DB_SWAPBYTES (-30888)/* Database needs byte swapping. */
+#define DB_TIMEOUT (-30887)/* Timed out waiting for election. */
+#define DB_TXN_CKP (-30886)/* Encountered ckp record in log. */
+#define DB_VERIFY_FATAL (-30885)/* DB->verify cannot proceed. */
+
+/* Database handle. */
+struct __db {
+ /*******************************************************
+ * Public: owned by the application.
+ *******************************************************/
+ u_int32_t pgsize; /* Database logical page size. */
+ DB_CACHE_PRIORITY priority; /* Database priority in cache. */
+
+ /* Callbacks. */
+ int (*db_append_recno) __P((DB *, DBT *, db_recno_t));
+ void (*db_feedback) __P((DB *, int, int));
+ int (*dup_compare) __P((DB *, const DBT *, const DBT *));
+
+ void *app_private; /* Application-private handle. */
+
+ /*******************************************************
+ * Private: owned by DB.
+ *******************************************************/
+ DB_ENV *dbenv; /* Backing public environment. */
+ ENV *env; /* Backing private environment. */
+
+ DBTYPE type; /* DB access method type. */
+
+ DB_MPOOLFILE *mpf; /* Backing buffer pool. */
+
+ db_mutex_t mutex; /* Synchronization for free threading */
+
+ char *fname, *dname; /* File/database passed to DB->open. */
+ const char *dirname; /* Direcory of DB file. */
+ u_int32_t open_flags; /* Flags passed to DB->open. */
+
+ u_int8_t fileid[DB_FILE_ID_LEN];/* File's unique ID for locking. */
+
+ u_int32_t adj_fileid; /* File's unique ID for curs. adj. */
+
+#define DB_LOGFILEID_INVALID -1
+ FNAME *log_filename; /* File's naming info for logging. */
+
+ db_pgno_t meta_pgno; /* Meta page number */
+ DB_LOCKER *locker; /* Locker for handle locking. */
+ DB_LOCKER *cur_locker; /* Current handle lock holder. */
+ DB_TXN *cur_txn; /* Opening transaction. */
+ DB_LOCKER *associate_locker; /* Locker for DB->associate call. */
+ DB_LOCK handle_lock; /* Lock held on this handle. */
+
+ u_int cl_id; /* RPC: remote client id. */
+
+ time_t timestamp; /* Handle timestamp for replication. */
+ u_int32_t fid_gen; /* Rep generation number for fids. */
+
+ /*
+ * Returned data memory for DB->get() and friends.
+ */
+ DBT my_rskey; /* Secondary key. */
+ DBT my_rkey; /* [Primary] key. */
+ DBT my_rdata; /* Data. */
+
+ /*
+ * !!!
+ * Some applications use DB but implement their own locking outside of
+ * DB. If they're using fcntl(2) locking on the underlying database
+ * file, and we open and close a file descriptor for that file, we will
+ * discard their locks. The DB_FCNTL_LOCKING flag to DB->open is an
+ * undocumented interface to support this usage which leaves any file
+ * descriptors we open until DB->close. This will only work with the
+ * DB->open interface and simple caches, e.g., creating a transaction
+ * thread may open/close file descriptors this flag doesn't protect.
+ * Locking with fcntl(2) on a file that you don't own is a very, very
+ * unsafe thing to do. 'Nuff said.
+ */
+ DB_FH *saved_open_fhp; /* Saved file handle. */
+
+ /*
+ * Linked list of DBP's, linked from the ENV, used to keep track
+ * of all open db handles for cursor adjustment.
+ *
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * TAILQ_ENTRY(__db) dblistlinks;
+ */
+ struct {
+ struct __db *tqe_next;
+ struct __db **tqe_prev;
+ } dblistlinks;
+
+ /*
+ * Cursor queues.
+ *
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * TAILQ_HEAD(__cq_fq, __dbc) free_queue;
+ * TAILQ_HEAD(__cq_aq, __dbc) active_queue;
+ * TAILQ_HEAD(__cq_jq, __dbc) join_queue;
+ */
+ struct __cq_fq {
+ struct __dbc *tqh_first;
+ struct __dbc **tqh_last;
+ } free_queue;
+ struct __cq_aq {
+ struct __dbc *tqh_first;
+ struct __dbc **tqh_last;
+ } active_queue;
+ struct __cq_jq {
+ struct __dbc *tqh_first;
+ struct __dbc **tqh_last;
+ } join_queue;
+
+ /*
+ * Secondary index support.
+ *
+ * Linked list of secondary indices -- set in the primary.
+ *
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * LIST_HEAD(s_secondaries, __db);
+ */
+ struct {
+ struct __db *lh_first;
+ } s_secondaries;
+
+ /*
+ * List entries for secondaries, and reference count of how many
+ * threads are updating this secondary (see Dbc.put).
+ *
+ * !!!
+ * Note that these are synchronized by the primary's mutex, but
+ * filled in in the secondaries.
+ *
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * LIST_ENTRY(__db) s_links;
+ */
+ struct {
+ struct __db *le_next;
+ struct __db **le_prev;
+ } s_links;
+ u_int32_t s_refcnt;
+
+ /* Secondary callback and free functions -- set in the secondary. */
+ int (*s_callback) __P((DB *, const DBT *, const DBT *, DBT *));
+
+ /* Reference to primary -- set in the secondary. */
+ DB *s_primary;
+
+#define DB_ASSOC_IMMUTABLE_KEY 0x00000001 /* Secondary key is immutable. */
+
+ /* Flags passed to associate -- set in the secondary. */
+ u_int32_t s_assoc_flags;
+
+ /*
+ * Foreign key support.
+ *
+ * Linked list of primary dbs -- set in the foreign db
+ *
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * LIST_HEAD(f_primaries, __db);
+ */
+ struct {
+ struct __db_foreign_info *lh_first;
+ } f_primaries;
+
+ /* Reference to foreign -- set in the secondary. */
+ DB *s_foreign;
+
+ /* API-private structure: used by DB 1.85, C++, Java, Perl and Tcl */
+ void *api_internal;
+
+ /* Subsystem-private structure. */
+ void *bt_internal; /* Btree/Recno access method. */
+ void *h_internal; /* Hash access method. */
+ void *p_internal; /* Partition informaiton. */
+ void *q_internal; /* Queue access method. */
+
+ /* DB PUBLIC HANDLE LIST BEGIN */
+ int (*associate) __P((DB *, DB_TXN *, DB *,
+ int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t));
+ int (*associate_foreign) __P((DB *, DB *,
+ int (*)(DB *, const DBT *, DBT *, const DBT *, int *),
+ u_int32_t));
+ int (*close) __P((DB *, u_int32_t));
+ int (*compact) __P((DB *,
+ DB_TXN *, DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *));
+ int (*cursor) __P((DB *, DB_TXN *, DBC **, u_int32_t));
+ int (*del) __P((DB *, DB_TXN *, DBT *, u_int32_t));
+ void (*err) __P((DB *, int, const char *, ...));
+ void (*errx) __P((DB *, const char *, ...));
+ int (*exists) __P((DB *, DB_TXN *, DBT *, u_int32_t));
+ int (*fd) __P((DB *, int *));
+ int (*get) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+ int (*get_alloc) __P((DB *, void *(**)(size_t),
+ void *(**)(void *, size_t), void (**)(void *)));
+ int (*get_append_recno) __P((DB *, int (**)(DB *, DBT *, db_recno_t)));
+ int (*get_bt_compare)
+ __P((DB *, int (**)(DB *, const DBT *, const DBT *)));
+ int (*get_bt_compress) __P((DB *,
+ int (**)(DB *,
+ const DBT *, const DBT *, const DBT *, const DBT *, DBT *),
+ int (**)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *)));
+ int (*get_bt_minkey) __P((DB *, u_int32_t *));
+ int (*get_bt_prefix)
+ __P((DB *, size_t (**)(DB *, const DBT *, const DBT *)));
+ int (*get_byteswapped) __P((DB *, int *));
+ int (*get_cachesize) __P((DB *, u_int32_t *, u_int32_t *, int *));
+ int (*get_create_dir) __P((DB *, const char **));
+ int (*get_dbname) __P((DB *, const char **, const char **));
+ int (*get_dup_compare)
+ __P((DB *, int (**)(DB *, const DBT *, const DBT *)));
+ int (*get_encrypt_flags) __P((DB *, u_int32_t *));
+ DB_ENV *(*get_env) __P((DB *));
+ void (*get_errcall) __P((DB *,
+ void (**)(const DB_ENV *, const char *, const char *)));
+ void (*get_errfile) __P((DB *, FILE **));
+ void (*get_errpfx) __P((DB *, const char **));
+ int (*get_feedback) __P((DB *, void (**)(DB *, int, int)));
+ int (*get_flags) __P((DB *, u_int32_t *));
+ int (*get_h_compare)
+ __P((DB *, int (**)(DB *, const DBT *, const DBT *)));
+ int (*get_h_ffactor) __P((DB *, u_int32_t *));
+ int (*get_h_hash)
+ __P((DB *, u_int32_t (**)(DB *, const void *, u_int32_t)));
+ int (*get_h_nelem) __P((DB *, u_int32_t *));
+ int (*get_lorder) __P((DB *, int *));
+ DB_MPOOLFILE *(*get_mpf) __P((DB *));
+ void (*get_msgcall) __P((DB *,
+ void (**)(const DB_ENV *, const char *)));
+ void (*get_msgfile) __P((DB *, FILE **));
+ int (*get_multiple) __P((DB *));
+ int (*get_open_flags) __P((DB *, u_int32_t *));
+ int (*get_pagesize) __P((DB *, u_int32_t *));
+ int (*get_partition_callback) __P((DB *,
+ u_int32_t *, u_int32_t (**)(DB *, DBT *key)));
+ int (*get_partition_dirs) __P((DB *, const char ***));
+ int (*get_partition_keys) __P((DB *, u_int32_t *, DBT **));
+ int (*get_priority) __P((DB *, DB_CACHE_PRIORITY *));
+ int (*get_q_extentsize) __P((DB *, u_int32_t *));
+ int (*get_re_delim) __P((DB *, int *));
+ int (*get_re_len) __P((DB *, u_int32_t *));
+ int (*get_re_pad) __P((DB *, int *));
+ int (*get_re_source) __P((DB *, const char **));
+ int (*get_transactional) __P((DB *));
+ int (*get_type) __P((DB *, DBTYPE *));
+ int (*join) __P((DB *, DBC **, DBC **, u_int32_t));
+ int (*key_range)
+ __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t));
+ int (*open) __P((DB *,
+ DB_TXN *, const char *, const char *, DBTYPE, u_int32_t, int));
+ int (*pget) __P((DB *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t));
+ int (*put) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+ int (*remove) __P((DB *, const char *, const char *, u_int32_t));
+ int (*rename) __P((DB *,
+ const char *, const char *, const char *, u_int32_t));
+ int (*set_alloc) __P((DB *, void *(*)(size_t),
+ void *(*)(void *, size_t), void (*)(void *)));
+ int (*set_append_recno) __P((DB *, int (*)(DB *, DBT *, db_recno_t)));
+ int (*set_bt_compare)
+ __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+ int (*set_bt_compress) __P((DB *,
+ int (*)(DB *, const DBT *, const DBT *, const DBT *, const DBT *, DBT *),
+ int (*)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *)));
+ int (*set_bt_minkey) __P((DB *, u_int32_t));
+ int (*set_bt_prefix)
+ __P((DB *, size_t (*)(DB *, const DBT *, const DBT *)));
+ int (*set_cachesize) __P((DB *, u_int32_t, u_int32_t, int));
+ int (*set_create_dir) __P((DB *, const char *));
+ int (*set_dup_compare)
+ __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+ int (*set_encrypt) __P((DB *, const char *, u_int32_t));
+ void (*set_errcall) __P((DB *,
+ void (*)(const DB_ENV *, const char *, const char *)));
+ void (*set_errfile) __P((DB *, FILE *));
+ void (*set_errpfx) __P((DB *, const char *));
+ int (*set_feedback) __P((DB *, void (*)(DB *, int, int)));
+ int (*set_flags) __P((DB *, u_int32_t));
+ int (*set_h_compare)
+ __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+ int (*set_h_ffactor) __P((DB *, u_int32_t));
+ int (*set_h_hash)
+ __P((DB *, u_int32_t (*)(DB *, const void *, u_int32_t)));
+ int (*set_h_nelem) __P((DB *, u_int32_t));
+ int (*set_lorder) __P((DB *, int));
+ void (*set_msgcall) __P((DB *, void (*)(const DB_ENV *, const char *)));
+ void (*set_msgfile) __P((DB *, FILE *));
+ int (*set_pagesize) __P((DB *, u_int32_t));
+ int (*set_paniccall) __P((DB *, void (*)(DB_ENV *, int)));
+ int (*set_partition) __P((DB *,
+ u_int32_t, DBT *, u_int32_t (*)(DB *, DBT *key)));
+ int (*set_partition_dirs) __P((DB *, const char **));
+ int (*set_priority) __P((DB *, DB_CACHE_PRIORITY));
+ int (*set_q_extentsize) __P((DB *, u_int32_t));
+ int (*set_re_delim) __P((DB *, int));
+ int (*set_re_len) __P((DB *, u_int32_t));
+ int (*set_re_pad) __P((DB *, int));
+ int (*set_re_source) __P((DB *, const char *));
+ int (*sort_multiple) __P((DB *, DBT *, DBT *, u_int32_t));
+ int (*stat) __P((DB *, DB_TXN *, void *, u_int32_t));
+ int (*stat_print) __P((DB *, u_int32_t));
+ int (*sync) __P((DB *, u_int32_t));
+ int (*truncate) __P((DB *, DB_TXN *, u_int32_t *, u_int32_t));
+ int (*upgrade) __P((DB *, const char *, u_int32_t));
+ int (*verify)
+ __P((DB *, const char *, const char *, FILE *, u_int32_t));
+ /* DB PUBLIC HANDLE LIST END */
+
+ /* DB PRIVATE HANDLE LIST BEGIN */
+ int (*dump) __P((DB *, const char *,
+ int (*)(void *, const void *), void *, int, int));
+ int (*db_am_remove) __P((DB *, DB_THREAD_INFO *,
+ DB_TXN *, const char *, const char *, u_int32_t));
+ int (*db_am_rename) __P((DB *, DB_THREAD_INFO *,
+ DB_TXN *, const char *, const char *, const char *));
+ /* DB PRIVATE HANDLE LIST END */
+
+ /*
+ * Never called; these are a place to save function pointers
+ * so that we can undo an associate.
+ */
+ int (*stored_get) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+ int (*stored_close) __P((DB *, u_int32_t));
+
+#define DB_OK_BTREE 0x01
+#define DB_OK_HASH 0x02
+#define DB_OK_QUEUE 0x04
+#define DB_OK_RECNO 0x08
+ u_int32_t am_ok; /* Legal AM choices. */
+
+ /*
+ * This field really ought to be an AM_FLAG, but we have
+ * have run out of bits. If/when we decide to split up
+ * the flags, we can incorporate it.
+ */
+ int preserve_fid; /* Do not free fileid on close. */
+
+#define DB_AM_CHKSUM 0x00000001 /* Checksumming */
+#define DB_AM_COMPENSATE 0x00000002 /* Created by compensating txn */
+#define DB_AM_COMPRESS 0x00000004 /* Compressed BTree */
+#define DB_AM_CREATED 0x00000008 /* Database was created upon open */
+#define DB_AM_CREATED_MSTR 0x00000010 /* Encompassing file was created */
+#define DB_AM_DBM_ERROR 0x00000020 /* Error in DBM/NDBM database */
+#define DB_AM_DELIMITER 0x00000040 /* Variable length delimiter set */
+#define DB_AM_DISCARD 0x00000080 /* Discard any cached pages */
+#define DB_AM_DUP 0x00000100 /* DB_DUP */
+#define DB_AM_DUPSORT 0x00000200 /* DB_DUPSORT */
+#define DB_AM_ENCRYPT 0x00000400 /* Encryption */
+#define DB_AM_FIXEDLEN 0x00000800 /* Fixed-length records */
+#define DB_AM_INMEM 0x00001000 /* In-memory; no sync on close */
+#define DB_AM_INORDER 0x00002000 /* DB_INORDER */
+#define DB_AM_IN_RENAME 0x00004000 /* File is being renamed */
+#define DB_AM_NOT_DURABLE 0x00008000 /* Do not log changes */
+#define DB_AM_OPEN_CALLED 0x00010000 /* DB->open called */
+#define DB_AM_PAD 0x00020000 /* Fixed-length record pad */
+#define DB_AM_PGDEF 0x00040000 /* Page size was defaulted */
+#define DB_AM_RDONLY 0x00080000 /* Database is readonly */
+#define DB_AM_READ_UNCOMMITTED 0x00100000 /* Support degree 1 isolation */
+#define DB_AM_RECNUM 0x00200000 /* DB_RECNUM */
+#define DB_AM_RECOVER 0x00400000 /* DB opened by recovery routine */
+#define DB_AM_RENUMBER 0x00800000 /* DB_RENUMBER */
+#define DB_AM_REVSPLITOFF 0x01000000 /* DB_REVSPLITOFF */
+#define DB_AM_SECONDARY 0x02000000 /* Database is a secondary index */
+#define DB_AM_SNAPSHOT 0x04000000 /* DB_SNAPSHOT */
+#define DB_AM_SUBDB 0x08000000 /* Subdatabases supported */
+#define DB_AM_SWAP 0x10000000 /* Pages need to be byte-swapped */
+#define DB_AM_TXN 0x20000000 /* Opened in a transaction */
+#define DB_AM_VERIFYING 0x40000000 /* DB handle is in the verifier */
+ u_int32_t orig_flags; /* Flags at open, for refresh */
+ u_int32_t flags;
+};
+
+/*
+ * Macros for bulk operations. These are only intended for the C API.
+ * For C++, use DbMultiple*Iterator or DbMultiple*Builder.
+ *
+ * Bulk operations store multiple entries into a single DBT structure. The
+ * following macros assist with creating and reading these Multiple DBTs.
+ *
+ * The basic layout for single data items is:
+ *
+ * -------------------------------------------------------------------------
+ * | data1 | ... | dataN | ..... |-1 | dNLen | dNOff | ... | d1Len | d1Off |
+ * -------------------------------------------------------------------------
+ *
+ * For the DB_MULTIPLE_KEY* macros, the items are in key/data pairs, so data1
+ * would be a key, and data2 its corresponding value (N is always even).
+ *
+ * For the DB_MULTIPLE_RECNO* macros, the record number is stored along with
+ * the len/off pair in the "header" section, and the list is zero terminated
+ * (since -1 is a valid record number):
+ *
+ * --------------------------------------------------------------------------
+ * | d1 |..| dN |..| 0 | dNLen | dNOff | recnoN |..| d1Len | d1Off | recno1 |
+ * --------------------------------------------------------------------------
+ */
+#define DB_MULTIPLE_INIT(pointer, dbt) \
+ (pointer = (u_int8_t *)(dbt)->data + \
+ (dbt)->ulen - sizeof(u_int32_t))
+
+#define DB_MULTIPLE_NEXT(pointer, dbt, retdata, retdlen) \
+ do { \
+ u_int32_t *__p = (u_int32_t *)(pointer); \
+ if (*__p == (u_int32_t)-1) { \
+ retdata = NULL; \
+ pointer = NULL; \
+ break; \
+ } \
+ retdata = (u_int8_t *)(dbt)->data + *__p--; \
+ retdlen = *__p--; \
+ pointer = __p; \
+ if (retdlen == 0 && retdata == (u_int8_t *)(dbt)->data) \
+ retdata = NULL; \
+ } while (0)
+
+#define DB_MULTIPLE_KEY_NEXT(pointer, dbt, retkey, retklen, retdata, retdlen) \
+ do { \
+ u_int32_t *__p = (u_int32_t *)(pointer); \
+ if (*__p == (u_int32_t)-1) { \
+ retdata = NULL; \
+ retkey = NULL; \
+ pointer = NULL; \
+ break; \
+ } \
+ retkey = (u_int8_t *)(dbt)->data + *__p--; \
+ retklen = *__p--; \
+ retdata = (u_int8_t *)(dbt)->data + *__p--; \
+ retdlen = *__p--; \
+ pointer = __p; \
+ } while (0)
+
+#define DB_MULTIPLE_RECNO_NEXT(pointer, dbt, recno, retdata, retdlen) \
+ do { \
+ u_int32_t *__p = (u_int32_t *)(pointer); \
+ if (*__p == (u_int32_t)0) { \
+ recno = 0; \
+ retdata = NULL; \
+ pointer = NULL; \
+ break; \
+ } \
+ recno = *__p--; \
+ retdata = (u_int8_t *)(dbt)->data + *__p--; \
+ retdlen = *__p--; \
+ pointer = __p; \
+ } while (0)
+
+#define DB_MULTIPLE_WRITE_INIT(pointer, dbt) \
+ do { \
+ (dbt)->flags |= DB_DBT_BULK; \
+ pointer = (u_int8_t *)(dbt)->data + \
+ (dbt)->ulen - sizeof(u_int32_t); \
+ *(u_int32_t *)(pointer) = (u_int32_t)-1; \
+ } while (0)
+
+#define DB_MULTIPLE_RESERVE_NEXT(pointer, dbt, writedata, writedlen) \
+ do { \
+ u_int32_t *__p = (u_int32_t *)(pointer); \
+ u_int32_t __off = ((pointer) == (u_int8_t *)(dbt)->data +\
+ (dbt)->ulen - sizeof(u_int32_t)) ? 0 : __p[1] + __p[2];\
+ if ((u_int8_t *)(dbt)->data + __off + (writedlen) > \
+ (u_int8_t *)(__p - 2)) \
+ writedata = NULL; \
+ else { \
+ writedata = (u_int8_t *)(dbt)->data + __off; \
+ __p[0] = __off; \
+ __p[-1] = (writedlen); \
+ __p[-2] = (u_int32_t)-1; \
+ pointer = __p - 2; \
+ } \
+ } while (0)
+
+#define DB_MULTIPLE_WRITE_NEXT(pointer, dbt, writedata, writedlen) \
+ do { \
+ void *__destd; \
+ DB_MULTIPLE_RESERVE_NEXT((pointer), (dbt), \
+ __destd, (writedlen)); \
+ if (__destd == NULL) \
+ pointer = NULL; \
+ else \
+ memcpy(__destd, (writedata), (writedlen)); \
+ } while (0)
+
+#define DB_MULTIPLE_KEY_RESERVE_NEXT(pointer, dbt, writekey, writeklen, writedata, writedlen) \
+ do { \
+ u_int32_t *__p = (u_int32_t *)(pointer); \
+ u_int32_t __off = ((pointer) == (u_int8_t *)(dbt)->data +\
+ (dbt)->ulen - sizeof(u_int32_t)) ? 0 : __p[1] + __p[2];\
+ if ((u_int8_t *)(dbt)->data + __off + (writeklen) + \
+ (writedlen) > (u_int8_t *)(__p - 4)) { \
+ writekey = NULL; \
+ writedata = NULL; \
+ } else { \
+ writekey = (u_int8_t *)(dbt)->data + __off; \
+ __p[0] = __off; \
+ __p[-1] = (writeklen); \
+ __p -= 2; \
+ __off += (writeklen); \
+ writedata = (u_int8_t *)(dbt)->data + __off; \
+ __p[0] = __off; \
+ __p[-1] = (writedlen); \
+ __p[-2] = (u_int32_t)-1; \
+ pointer = __p - 2; \
+ } \
+ } while (0)
+
+#define DB_MULTIPLE_KEY_WRITE_NEXT(pointer, dbt, writekey, writeklen, writedata, writedlen) \
+ do { \
+ void *__destk, *__destd; \
+ DB_MULTIPLE_KEY_RESERVE_NEXT((pointer), (dbt), \
+ __destk, (writeklen), __destd, (writedlen)); \
+ if (__destk == NULL) \
+ pointer = NULL; \
+ else { \
+ memcpy(__destk, (writekey), (writeklen)); \
+ if (__destd != NULL) \
+ memcpy(__destd, (writedata), (writedlen));\
+ } \
+ } while (0)
+
+#define DB_MULTIPLE_RECNO_WRITE_INIT(pointer, dbt) \
+ do { \
+ (dbt)->flags |= DB_DBT_BULK; \
+ pointer = (u_int8_t *)(dbt)->data + \
+ (dbt)->ulen - sizeof(u_int32_t); \
+ *(u_int32_t *)(pointer) = 0; \
+ } while (0)
+
+#define DB_MULTIPLE_RECNO_RESERVE_NEXT(pointer, dbt, recno, writedata, writedlen) \
+ do { \
+ u_int32_t *__p = (u_int32_t *)(pointer); \
+ u_int32_t __off = ((pointer) == (u_int8_t *)(dbt)->data +\
+ (dbt)->ulen - sizeof(u_int32_t)) ? 0 : __p[1] + __p[2]; \
+ if (((u_int8_t *)(dbt)->data + __off) + (writedlen) > \
+ (u_int8_t *)(__p - 3)) \
+ writedata = NULL; \
+ else { \
+ writedata = (u_int8_t *)(dbt)->data + __off; \
+ __p[0] = (u_int32_t)(recno); \
+ __p[-1] = __off; \
+ __p[-2] = (writedlen); \
+ __p[-3] = 0; \
+ pointer = __p - 3; \
+ } \
+ } while (0)
+
+#define DB_MULTIPLE_RECNO_WRITE_NEXT(pointer, dbt, recno, writedata, writedlen)\
+ do { \
+ void *__destd; \
+ DB_MULTIPLE_RECNO_RESERVE_NEXT((pointer), (dbt), \
+ (recno), __destd, (writedlen)); \
+ if (__destd == NULL) \
+ pointer = NULL; \
+ else if ((writedlen) != 0) \
+ memcpy(__destd, (writedata), (writedlen)); \
+ } while (0)
+
+/*******************************************************
+ * Access method cursors.
+ *******************************************************/
+struct __dbc {
+ DB *dbp; /* Backing database */
+ DB_ENV *dbenv; /* Backing environment */
+ ENV *env; /* Backing environment */
+
+ DB_THREAD_INFO *thread_info; /* Thread that owns this cursor. */
+ DB_TXN *txn; /* Associated transaction. */
+ DB_CACHE_PRIORITY priority; /* Priority in cache. */
+
+ /*
+ * Active/free cursor queues.
+ *
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * TAILQ_ENTRY(__dbc) links;
+ */
+ struct {
+ DBC *tqe_next;
+ DBC **tqe_prev;
+ } links;
+
+ /*
+ * The DBT *'s below are used by the cursor routines to return
+ * data to the user when DBT flags indicate that DB should manage
+ * the returned memory. They point at a DBT containing the buffer
+ * and length that will be used, and "belonging" to the handle that
+ * should "own" this memory. This may be a "my_*" field of this
+ * cursor--the default--or it may be the corresponding field of
+ * another cursor, a DB handle, a join cursor, etc. In general, it
+ * will be whatever handle the user originally used for the current
+ * DB interface call.
+ */
+ DBT *rskey; /* Returned secondary key. */
+ DBT *rkey; /* Returned [primary] key. */
+ DBT *rdata; /* Returned data. */
+
+ DBT my_rskey; /* Space for returned secondary key. */
+ DBT my_rkey; /* Space for returned [primary] key. */
+ DBT my_rdata; /* Space for returned data. */
+
+ DB_LOCKER *lref; /* Reference to default locker. */
+ DB_LOCKER *locker; /* Locker for this operation. */
+ DBT lock_dbt; /* DBT referencing lock. */
+ DB_LOCK_ILOCK lock; /* Object to be locked. */
+ DB_LOCK mylock; /* CDB lock held on this cursor. */
+
+ u_int cl_id; /* Remote client id. */
+
+ DBTYPE dbtype; /* Cursor type. */
+
+ DBC_INTERNAL *internal; /* Access method private. */
+
+ /* DBC PUBLIC HANDLE LIST BEGIN */
+ int (*close) __P((DBC *));
+ int (*cmp) __P((DBC *, DBC *, int *, u_int32_t));
+ int (*count) __P((DBC *, db_recno_t *, u_int32_t));
+ int (*del) __P((DBC *, u_int32_t));
+ int (*dup) __P((DBC *, DBC **, u_int32_t));
+ int (*get) __P((DBC *, DBT *, DBT *, u_int32_t));
+ int (*get_priority) __P((DBC *, DB_CACHE_PRIORITY *));
+ int (*pget) __P((DBC *, DBT *, DBT *, DBT *, u_int32_t));
+ int (*put) __P((DBC *, DBT *, DBT *, u_int32_t));
+ int (*set_priority) __P((DBC *, DB_CACHE_PRIORITY));
+ /* DBC PUBLIC HANDLE LIST END */
+
+ /* The following are the method names deprecated in the 4.6 release. */
+ int (*c_close) __P((DBC *));
+ int (*c_count) __P((DBC *, db_recno_t *, u_int32_t));
+ int (*c_del) __P((DBC *, u_int32_t));
+ int (*c_dup) __P((DBC *, DBC **, u_int32_t));
+ int (*c_get) __P((DBC *, DBT *, DBT *, u_int32_t));
+ int (*c_pget) __P((DBC *, DBT *, DBT *, DBT *, u_int32_t));
+ int (*c_put) __P((DBC *, DBT *, DBT *, u_int32_t));
+
+ /* DBC PRIVATE HANDLE LIST BEGIN */
+ int (*am_bulk) __P((DBC *, DBT *, u_int32_t));
+ int (*am_close) __P((DBC *, db_pgno_t, int *));
+ int (*am_del) __P((DBC *, u_int32_t));
+ int (*am_destroy) __P((DBC *));
+ int (*am_get) __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
+ int (*am_put) __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
+ int (*am_writelock) __P((DBC *));
+ /* DBC PRIVATE HANDLE LIST END */
+
+/*
+ * DBC_DONTLOCK and DBC_RECOVER are used during recovery and transaction
+ * abort. If a transaction is being aborted or recovered then DBC_RECOVER
+ * will be set and locking and logging will be disabled on this cursor. If
+ * we are performing a compensating transaction (e.g. free page processing)
+ * then DB_DONTLOCK will be set to inhibit locking, but logging will still
+ * be required. DB_DONTLOCK is also used if the whole database is locked.
+ */
+#define DBC_ACTIVE 0x00001 /* Cursor in use. */
+#define DBC_BULK 0x00002 /* Bulk update cursor. */
+#define DBC_DONTLOCK 0x00004 /* Don't lock on this cursor. */
+#define DBC_DOWNREV 0x00008 /* Down rev replication master. */
+#define DBC_DUPLICATE 0x00010 /* Create a duplicate cursor. */
+#define DBC_FROM_DB_GET 0x00020 /* Called from the DB->get() method. */
+#define DBC_MULTIPLE 0x00040 /* Return Multiple data. */
+#define DBC_MULTIPLE_KEY 0x00080 /* Return Multiple keys and data. */
+#define DBC_OPD 0x00100 /* Cursor references off-page dups. */
+#define DBC_OWN_LID 0x00200 /* Free lock id on destroy. */
+#define DBC_PARTITIONED 0x00400 /* Cursor for a partitioned db. */
+#define DBC_READ_COMMITTED 0x00800 /* Cursor has degree 2 isolation. */
+#define DBC_READ_UNCOMMITTED 0x01000 /* Cursor has degree 1 isolation. */
+#define DBC_RECOVER 0x02000 /* Recovery cursor; don't log/lock. */
+#define DBC_RMW 0x04000 /* Acquire write flag in read op. */
+#define DBC_TRANSIENT 0x08000 /* Cursor is transient. */
+#define DBC_WAS_READ_COMMITTED 0x10000 /* Cursor holds a read commited lock. */
+#define DBC_WRITECURSOR 0x20000 /* Cursor may be used to write (CDB). */
+#define DBC_WRITER 0x40000 /* Cursor immediately writing (CDB). */
+ u_int32_t flags;
+};
+
+/* Key range statistics structure */
+struct __key_range {
+ double less;
+ double equal;
+ double greater;
+};
+
+/* Btree/Recno statistics structure. */
+struct __db_bt_stat {
+ u_int32_t bt_magic; /* Magic number. */
+ u_int32_t bt_version; /* Version number. */
+ u_int32_t bt_metaflags; /* Metadata flags. */
+ u_int32_t bt_nkeys; /* Number of unique keys. */
+ u_int32_t bt_ndata; /* Number of data items. */
+ u_int32_t bt_pagecnt; /* Page count. */
+ u_int32_t bt_pagesize; /* Page size. */
+ u_int32_t bt_minkey; /* Minkey value. */
+ u_int32_t bt_re_len; /* Fixed-length record length. */
+ u_int32_t bt_re_pad; /* Fixed-length record pad. */
+ u_int32_t bt_levels; /* Tree levels. */
+ u_int32_t bt_int_pg; /* Internal pages. */
+ u_int32_t bt_leaf_pg; /* Leaf pages. */
+ u_int32_t bt_dup_pg; /* Duplicate pages. */
+ u_int32_t bt_over_pg; /* Overflow pages. */
+ u_int32_t bt_empty_pg; /* Empty pages. */
+ u_int32_t bt_free; /* Pages on the free list. */
+ uintmax_t bt_int_pgfree; /* Bytes free in internal pages. */
+ uintmax_t bt_leaf_pgfree; /* Bytes free in leaf pages. */
+ uintmax_t bt_dup_pgfree; /* Bytes free in duplicate pages. */
+ uintmax_t bt_over_pgfree; /* Bytes free in overflow pages. */
+};
+
+struct __db_compact {
+ /* Input Parameters. */
+ u_int32_t compact_fillpercent; /* Desired fillfactor: 1-100 */
+ db_timeout_t compact_timeout; /* Lock timeout. */
+ u_int32_t compact_pages; /* Max pages to process. */
+ /* Output Stats. */
+ u_int32_t compact_pages_free; /* Number of pages freed. */
+ u_int32_t compact_pages_examine; /* Number of pages examine. */
+ u_int32_t compact_levels; /* Number of levels removed. */
+ u_int32_t compact_deadlock; /* Number of deadlocks. */
+ db_pgno_t compact_pages_truncated; /* Pages truncated to OS. */
+ /* Internal. */
+ db_pgno_t compact_truncate; /* Page number for truncation */
+};
+
+/* Hash statistics structure. */
+struct __db_h_stat {
+ u_int32_t hash_magic; /* Magic number. */
+ u_int32_t hash_version; /* Version number. */
+ u_int32_t hash_metaflags; /* Metadata flags. */
+ u_int32_t hash_nkeys; /* Number of unique keys. */
+ u_int32_t hash_ndata; /* Number of data items. */
+ u_int32_t hash_pagecnt; /* Page count. */
+ u_int32_t hash_pagesize; /* Page size. */
+ u_int32_t hash_ffactor; /* Fill factor specified at create. */
+ u_int32_t hash_buckets; /* Number of hash buckets. */
+ u_int32_t hash_free; /* Pages on the free list. */
+ uintmax_t hash_bfree; /* Bytes free on bucket pages. */
+ u_int32_t hash_bigpages; /* Number of big key/data pages. */
+ uintmax_t hash_big_bfree; /* Bytes free on big item pages. */
+ u_int32_t hash_overflows; /* Number of overflow pages. */
+ uintmax_t hash_ovfl_free; /* Bytes free on ovfl pages. */
+ u_int32_t hash_dup; /* Number of dup pages. */
+ uintmax_t hash_dup_free; /* Bytes free on duplicate pages. */
+};
+
+/* Queue statistics structure. */
+struct __db_qam_stat {
+ u_int32_t qs_magic; /* Magic number. */
+ u_int32_t qs_version; /* Version number. */
+ u_int32_t qs_metaflags; /* Metadata flags. */
+ u_int32_t qs_nkeys; /* Number of unique keys. */
+ u_int32_t qs_ndata; /* Number of data items. */
+ u_int32_t qs_pagesize; /* Page size. */
+ u_int32_t qs_extentsize; /* Pages per extent. */
+ u_int32_t qs_pages; /* Data pages. */
+ u_int32_t qs_re_len; /* Fixed-length record length. */
+ u_int32_t qs_re_pad; /* Fixed-length record pad. */
+ u_int32_t qs_pgfree; /* Bytes free in data pages. */
+ u_int32_t qs_first_recno; /* First not deleted record. */
+ u_int32_t qs_cur_recno; /* Next available record number. */
+};
+
+/*******************************************************
+ * Environment.
+ *******************************************************/
+#define DB_REGION_MAGIC 0x120897 /* Environment magic number. */
+
+/*
+ * Database environment structure.
+ *
+ * This is the public database environment handle. The private environment
+ * handle is the ENV structure. The user owns this structure, the library
+ * owns the ENV structure. The reason there are two structures is because
+ * the user's configuration outlives any particular DB_ENV->open call, and
+ * separate structures allows us to easily discard internal information without
+ * discarding the user's configuration.
+ *
+ * Fields in the DB_ENV structure should normally be set only by application
+ * DB_ENV handle methods.
+ */
+struct __db_env {
+ ENV *env; /* Linked ENV structure */
+
+ /*
+ * The DB_ENV structure can be used concurrently, so field access is
+ * protected.
+ */
+ db_mutex_t mtx_db_env; /* DB_ENV structure mutex */
+
+ /* Error message callback */
+ void (*db_errcall) __P((const DB_ENV *, const char *, const char *));
+ FILE *db_errfile; /* Error message file stream */
+ const char *db_errpfx; /* Error message prefix */
+
+ /* Other message callback */
+ void (*db_msgcall) __P((const DB_ENV *, const char *));
+ FILE *db_msgfile; /* Other message file stream */
+
+ /* Other application callback functions */
+ int (*app_dispatch) __P((DB_ENV *, DBT *, DB_LSN *, db_recops));
+ void (*db_event_func) __P((DB_ENV *, u_int32_t, void *));
+ void (*db_feedback) __P((DB_ENV *, int, int));
+ void (*db_free) __P((void *));
+ void (*db_paniccall) __P((DB_ENV *, int));
+ void *(*db_malloc) __P((size_t));
+ void *(*db_realloc) __P((void *, size_t));
+ int (*is_alive) __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t));
+ void (*thread_id) __P((DB_ENV *, pid_t *, db_threadid_t *));
+ char *(*thread_id_string) __P((DB_ENV *, pid_t, db_threadid_t, char *));
+
+ /* Application specified paths */
+ char *db_log_dir; /* Database log file directory */
+ char *db_tmp_dir; /* Database tmp file directory */
+
+ char *db_create_dir; /* Create directory for data files */
+ char **db_data_dir; /* Database data file directories */
+ int data_cnt; /* Database data file slots */
+ int data_next; /* Next database data file slot */
+
+ char *intermediate_dir_mode; /* Intermediate directory perms */
+
+ long shm_key; /* shmget key */
+
+ char *passwd; /* Cryptography support */
+ size_t passwd_len;
+
+ void *cl_handle; /* RPC: remote client handle */
+ u_int cl_id; /* RPC: remote client env id */
+
+ /* Private handle references */
+ void *app_private; /* Application-private handle */
+ void *api1_internal; /* C++, Perl API private */
+ void *api2_internal; /* Java API private */
+
+ u_int32_t verbose; /* DB_VERB_XXX flags */
+
+ /* Mutex configuration */
+ u_int32_t mutex_align; /* Mutex alignment */
+ u_int32_t mutex_cnt; /* Number of mutexes to configure */
+ u_int32_t mutex_inc; /* Number of mutexes to add */
+ u_int32_t mutex_tas_spins;/* Test-and-set spin count */
+
+ /* Locking configuration */
+ u_int8_t *lk_conflicts; /* Two dimensional conflict matrix */
+ int lk_modes; /* Number of lock modes in table */
+ u_int32_t lk_detect; /* Deadlock detect on all conflicts */
+ u_int32_t lk_max; /* Maximum number of locks */
+ u_int32_t lk_max_lockers;/* Maximum number of lockers */
+ u_int32_t lk_max_objects;/* Maximum number of locked objects */
+ u_int32_t lk_partitions ;/* Number of object partitions */
+ db_timeout_t lk_timeout; /* Lock timeout period */
+
+ /* Logging configuration */
+ u_int32_t lg_bsize; /* Buffer size */
+ int lg_filemode; /* Log file permission mode */
+ u_int32_t lg_regionmax; /* Region size */
+ u_int32_t lg_size; /* Log file size */
+ u_int32_t lg_flags; /* Log configuration */
+
+ /* Memory pool configuration */
+ u_int32_t mp_gbytes; /* Cache size: GB */
+ u_int32_t mp_bytes; /* Cache size: bytes */
+ u_int32_t mp_max_gbytes; /* Maximum cache size: GB */
+ u_int32_t mp_max_bytes; /* Maximum cache size: bytes */
+ size_t mp_mmapsize; /* Maximum file size for mmap */
+ int mp_maxopenfd; /* Maximum open file descriptors */
+ int mp_maxwrite; /* Maximum buffers to write */
+ u_int mp_ncache; /* Initial number of cache regions */
+ u_int32_t mp_pagesize; /* Average page size */
+ u_int32_t mp_tablesize; /* Approximate hash table size */
+ /* Sleep after writing max buffers */
+ db_timeout_t mp_maxwrite_sleep;
+
+ /* Transaction configuration */
+ u_int32_t tx_max; /* Maximum number of transactions */
+ time_t tx_timestamp; /* Recover to specific timestamp */
+ db_timeout_t tx_timeout; /* Timeout for transactions */
+
+ /* Thread tracking configuration */
+ u_int32_t thr_max; /* Thread count */
+
+ /*
+ * The following fields are not strictly user-owned, but they outlive
+ * the ENV structure, and so are stored here.
+ */
+ DB_FH *registry; /* DB_REGISTER file handle */
+ u_int32_t registry_off; /*
+ * Offset of our slot. We can't use
+ * off_t because its size depends on
+ * build settings.
+ */
+ db_timeout_t envreg_timeout; /* DB_REGISTER wait timeout */
+
+#define DB_ENV_AUTO_COMMIT 0x00000001 /* DB_AUTO_COMMIT */
+#define DB_ENV_CDB_ALLDB 0x00000002 /* CDB environment wide locking */
+#define DB_ENV_FAILCHK 0x00000004 /* Failchk is running */
+#define DB_ENV_DIRECT_DB 0x00000008 /* DB_DIRECT_DB set */
+#define DB_ENV_DSYNC_DB 0x00000010 /* DB_DSYNC_DB set */
+#define DB_ENV_MULTIVERSION 0x00000020 /* DB_MULTIVERSION set */
+#define DB_ENV_NOLOCKING 0x00000040 /* DB_NOLOCKING set */
+#define DB_ENV_NOMMAP 0x00000080 /* DB_NOMMAP set */
+#define DB_ENV_NOPANIC 0x00000100 /* Okay if panic set */
+#define DB_ENV_OVERWRITE 0x00000200 /* DB_OVERWRITE set */
+#define DB_ENV_REGION_INIT 0x00000400 /* DB_REGION_INIT set */
+#define DB_ENV_RPCCLIENT 0x00000800 /* DB_RPCCLIENT set */
+#define DB_ENV_RPCCLIENT_GIVEN 0x00001000 /* User-supplied RPC client struct */
+#define DB_ENV_TIME_NOTGRANTED 0x00002000 /* DB_TIME_NOTGRANTED set */
+#define DB_ENV_TXN_NOSYNC 0x00004000 /* DB_TXN_NOSYNC set */
+#define DB_ENV_TXN_NOWAIT 0x00008000 /* DB_TXN_NOWAIT set */
+#define DB_ENV_TXN_SNAPSHOT 0x00010000 /* DB_TXN_SNAPSHOT set */
+#define DB_ENV_TXN_WRITE_NOSYNC 0x00020000 /* DB_TXN_WRITE_NOSYNC set */
+#define DB_ENV_YIELDCPU 0x00040000 /* DB_YIELDCPU set */
+ u_int32_t flags;
+
+ /* DB_ENV PUBLIC HANDLE LIST BEGIN */
+ int (*add_data_dir) __P((DB_ENV *, const char *));
+ int (*cdsgroup_begin) __P((DB_ENV *, DB_TXN **));
+ int (*close) __P((DB_ENV *, u_int32_t));
+ int (*dbremove) __P((DB_ENV *,
+ DB_TXN *, const char *, const char *, u_int32_t));
+ int (*dbrename) __P((DB_ENV *,
+ DB_TXN *, const char *, const char *, const char *, u_int32_t));
+ void (*err) __P((const DB_ENV *, int, const char *, ...));
+ void (*errx) __P((const DB_ENV *, const char *, ...));
+ int (*failchk) __P((DB_ENV *, u_int32_t));
+ int (*fileid_reset) __P((DB_ENV *, const char *, u_int32_t));
+ int (*get_alloc) __P((DB_ENV *, void *(**)(size_t),
+ void *(**)(void *, size_t), void (**)(void *)));
+ int (*get_app_dispatch)
+ __P((DB_ENV *, int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
+ int (*get_cache_max) __P((DB_ENV *, u_int32_t *, u_int32_t *));
+ int (*get_cachesize) __P((DB_ENV *, u_int32_t *, u_int32_t *, int *));
+ int (*get_create_dir) __P((DB_ENV *, const char **));
+ int (*get_data_dirs) __P((DB_ENV *, const char ***));
+ int (*get_encrypt_flags) __P((DB_ENV *, u_int32_t *));
+ void (*get_errcall) __P((DB_ENV *,
+ void (**)(const DB_ENV *, const char *, const char *)));
+ void (*get_errfile) __P((DB_ENV *, FILE **));
+ void (*get_errpfx) __P((DB_ENV *, const char **));
+ int (*get_flags) __P((DB_ENV *, u_int32_t *));
+ int (*get_feedback) __P((DB_ENV *, void (**)(DB_ENV *, int, int)));
+ int (*get_home) __P((DB_ENV *, const char **));
+ int (*get_intermediate_dir_mode) __P((DB_ENV *, const char **));
+ int (*get_isalive) __P((DB_ENV *,
+ int (**)(DB_ENV *, pid_t, db_threadid_t, u_int32_t)));
+ int (*get_lg_bsize) __P((DB_ENV *, u_int32_t *));
+ int (*get_lg_dir) __P((DB_ENV *, const char **));
+ int (*get_lg_filemode) __P((DB_ENV *, int *));
+ int (*get_lg_max) __P((DB_ENV *, u_int32_t *));
+ int (*get_lg_regionmax) __P((DB_ENV *, u_int32_t *));
+ int (*get_lk_conflicts) __P((DB_ENV *, const u_int8_t **, int *));
+ int (*get_lk_detect) __P((DB_ENV *, u_int32_t *));
+ int (*get_lk_max_lockers) __P((DB_ENV *, u_int32_t *));
+ int (*get_lk_max_locks) __P((DB_ENV *, u_int32_t *));
+ int (*get_lk_max_objects) __P((DB_ENV *, u_int32_t *));
+ int (*get_lk_partitions) __P((DB_ENV *, u_int32_t *));
+ int (*get_mp_max_openfd) __P((DB_ENV *, int *));
+ int (*get_mp_max_write) __P((DB_ENV *, int *, db_timeout_t *));
+ int (*get_mp_mmapsize) __P((DB_ENV *, size_t *));
+ int (*get_mp_pagesize) __P((DB_ENV *, u_int32_t *));
+ int (*get_mp_tablesize) __P((DB_ENV *, u_int32_t *));
+ void (*get_msgcall)
+ __P((DB_ENV *, void (**)(const DB_ENV *, const char *)));
+ void (*get_msgfile) __P((DB_ENV *, FILE **));
+ int (*get_open_flags) __P((DB_ENV *, u_int32_t *));
+ int (*get_shm_key) __P((DB_ENV *, long *));
+ int (*get_thread_count) __P((DB_ENV *, u_int32_t *));
+ int (*get_thread_id_fn)
+ __P((DB_ENV *, void (**)(DB_ENV *, pid_t *, db_threadid_t *)));
+ int (*get_thread_id_string_fn) __P((DB_ENV *,
+ char *(**)(DB_ENV *, pid_t, db_threadid_t, char *)));
+ int (*get_timeout) __P((DB_ENV *, db_timeout_t *, u_int32_t));
+ int (*get_tmp_dir) __P((DB_ENV *, const char **));
+ int (*get_tx_max) __P((DB_ENV *, u_int32_t *));
+ int (*get_tx_timestamp) __P((DB_ENV *, time_t *));
+ int (*get_verbose) __P((DB_ENV *, u_int32_t, int *));
+ int (*is_bigendian) __P((void));
+ int (*lock_detect) __P((DB_ENV *, u_int32_t, u_int32_t, int *));
+ int (*lock_get) __P((DB_ENV *,
+ u_int32_t, u_int32_t, DBT *, db_lockmode_t, DB_LOCK *));
+ int (*lock_id) __P((DB_ENV *, u_int32_t *));
+ int (*lock_id_free) __P((DB_ENV *, u_int32_t));
+ int (*lock_put) __P((DB_ENV *, DB_LOCK *));
+ int (*lock_stat) __P((DB_ENV *, DB_LOCK_STAT **, u_int32_t));
+ int (*lock_stat_print) __P((DB_ENV *, u_int32_t));
+ int (*lock_vec) __P((DB_ENV *,
+ u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **));
+ int (*log_archive) __P((DB_ENV *, char **[], u_int32_t));
+ int (*log_cursor) __P((DB_ENV *, DB_LOGC **, u_int32_t));
+ int (*log_file) __P((DB_ENV *, const DB_LSN *, char *, size_t));
+ int (*log_flush) __P((DB_ENV *, const DB_LSN *));
+ int (*log_get_config) __P((DB_ENV *, u_int32_t, int *));
+ int (*log_printf) __P((DB_ENV *, DB_TXN *, const char *, ...));
+ int (*log_put) __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t));
+ int (*log_set_config) __P((DB_ENV *, u_int32_t, int));
+ int (*log_stat) __P((DB_ENV *, DB_LOG_STAT **, u_int32_t));
+ int (*log_stat_print) __P((DB_ENV *, u_int32_t));
+ int (*lsn_reset) __P((DB_ENV *, const char *, u_int32_t));
+ int (*memp_fcreate) __P((DB_ENV *, DB_MPOOLFILE **, u_int32_t));
+ int (*memp_register) __P((DB_ENV *, int, int (*)(DB_ENV *, db_pgno_t,
+ void *, DBT *), int (*)(DB_ENV *, db_pgno_t, void *, DBT *)));
+ int (*memp_stat) __P((DB_ENV *,
+ DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, u_int32_t));
+ int (*memp_stat_print) __P((DB_ENV *, u_int32_t));
+ int (*memp_sync) __P((DB_ENV *, DB_LSN *));
+ int (*memp_trickle) __P((DB_ENV *, int, int *));
+ int (*mutex_alloc) __P((DB_ENV *, u_int32_t, db_mutex_t *));
+ int (*mutex_free) __P((DB_ENV *, db_mutex_t));
+ int (*mutex_get_align) __P((DB_ENV *, u_int32_t *));
+ int (*mutex_get_increment) __P((DB_ENV *, u_int32_t *));
+ int (*mutex_get_max) __P((DB_ENV *, u_int32_t *));
+ int (*mutex_get_tas_spins) __P((DB_ENV *, u_int32_t *));
+ int (*mutex_lock) __P((DB_ENV *, db_mutex_t));
+ int (*mutex_set_align) __P((DB_ENV *, u_int32_t));
+ int (*mutex_set_increment) __P((DB_ENV *, u_int32_t));
+ int (*mutex_set_max) __P((DB_ENV *, u_int32_t));
+ int (*mutex_set_tas_spins) __P((DB_ENV *, u_int32_t));
+ int (*mutex_stat) __P((DB_ENV *, DB_MUTEX_STAT **, u_int32_t));
+ int (*mutex_stat_print) __P((DB_ENV *, u_int32_t));
+ int (*mutex_unlock) __P((DB_ENV *, db_mutex_t));
+ int (*open) __P((DB_ENV *, const char *, u_int32_t, int));
+ int (*remove) __P((DB_ENV *, const char *, u_int32_t));
+ int (*rep_elect) __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t));
+ int (*rep_flush) __P((DB_ENV *));
+ int (*rep_get_clockskew) __P((DB_ENV *, u_int32_t *, u_int32_t *));
+ int (*rep_get_config) __P((DB_ENV *, u_int32_t, int *));
+ int (*rep_get_limit) __P((DB_ENV *, u_int32_t *, u_int32_t *));
+ int (*rep_get_nsites) __P((DB_ENV *, u_int32_t *));
+ int (*rep_get_priority) __P((DB_ENV *, u_int32_t *));
+ int (*rep_get_request) __P((DB_ENV *, u_int32_t *, u_int32_t *));
+ int (*rep_get_timeout) __P((DB_ENV *, int, u_int32_t *));
+ int (*rep_process_message)
+ __P((DB_ENV *, DBT *, DBT *, int, DB_LSN *));
+ int (*rep_set_clockskew) __P((DB_ENV *, u_int32_t, u_int32_t));
+ int (*rep_set_config) __P((DB_ENV *, u_int32_t, int));
+ int (*rep_set_limit) __P((DB_ENV *, u_int32_t, u_int32_t));
+ int (*rep_set_nsites) __P((DB_ENV *, u_int32_t));
+ int (*rep_set_priority) __P((DB_ENV *, u_int32_t));
+ int (*rep_set_request) __P((DB_ENV *, u_int32_t, u_int32_t));
+ int (*rep_set_timeout) __P((DB_ENV *, int, db_timeout_t));
+ int (*rep_set_transport) __P((DB_ENV *, int, int (*)(DB_ENV *,
+ const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)));
+ int (*rep_start) __P((DB_ENV *, DBT *, u_int32_t));
+ int (*rep_stat) __P((DB_ENV *, DB_REP_STAT **, u_int32_t));
+ int (*rep_stat_print) __P((DB_ENV *, u_int32_t));
+ int (*rep_sync) __P((DB_ENV *, u_int32_t));
+ int (*repmgr_add_remote_site)
+ __P((DB_ENV *, const char *, u_int, int *, u_int32_t));
+ int (*repmgr_get_ack_policy) __P((DB_ENV *, int *));
+ int (*repmgr_set_ack_policy) __P((DB_ENV *, int));
+ int (*repmgr_set_local_site)
+ __P((DB_ENV *, const char *, u_int, u_int32_t));
+ int (*repmgr_site_list)
+ __P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
+ int (*repmgr_start) __P((DB_ENV *, int, u_int32_t));
+ int (*repmgr_stat) __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t));
+ int (*repmgr_stat_print) __P((DB_ENV *, u_int32_t));
+ int (*set_alloc) __P((DB_ENV *, void *(*)(size_t),
+ void *(*)(void *, size_t), void (*)(void *)));
+ int (*set_app_dispatch)
+ __P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
+ int (*set_cache_max) __P((DB_ENV *, u_int32_t, u_int32_t));
+ int (*set_cachesize) __P((DB_ENV *, u_int32_t, u_int32_t, int));
+ int (*set_create_dir) __P((DB_ENV *, const char *));
+ int (*set_data_dir) __P((DB_ENV *, const char *));
+ int (*set_encrypt) __P((DB_ENV *, const char *, u_int32_t));
+ void (*set_errcall) __P((DB_ENV *,
+ void (*)(const DB_ENV *, const char *, const char *)));
+ void (*set_errfile) __P((DB_ENV *, FILE *));
+ void (*set_errpfx) __P((DB_ENV *, const char *));
+ int (*set_event_notify)
+ __P((DB_ENV *, void (*)(DB_ENV *, u_int32_t, void *)));
+ int (*set_feedback) __P((DB_ENV *, void (*)(DB_ENV *, int, int)));
+ int (*set_flags) __P((DB_ENV *, u_int32_t, int));
+ int (*set_intermediate_dir_mode) __P((DB_ENV *, const char *));
+ int (*set_isalive) __P((DB_ENV *,
+ int (*)(DB_ENV *, pid_t, db_threadid_t, u_int32_t)));
+ int (*set_lg_bsize) __P((DB_ENV *, u_int32_t));
+ int (*set_lg_dir) __P((DB_ENV *, const char *));
+ int (*set_lg_filemode) __P((DB_ENV *, int));
+ int (*set_lg_max) __P((DB_ENV *, u_int32_t));
+ int (*set_lg_regionmax) __P((DB_ENV *, u_int32_t));
+ int (*set_lk_conflicts) __P((DB_ENV *, u_int8_t *, int));
+ int (*set_lk_detect) __P((DB_ENV *, u_int32_t));
+ int (*set_lk_max_lockers) __P((DB_ENV *, u_int32_t));
+ int (*set_lk_max_locks) __P((DB_ENV *, u_int32_t));
+ int (*set_lk_max_objects) __P((DB_ENV *, u_int32_t));
+ int (*set_lk_partitions) __P((DB_ENV *, u_int32_t));
+ int (*set_mp_max_openfd) __P((DB_ENV *, int));
+ int (*set_mp_max_write) __P((DB_ENV *, int, db_timeout_t));
+ int (*set_mp_mmapsize) __P((DB_ENV *, size_t));
+ int (*set_mp_pagesize) __P((DB_ENV *, u_int32_t));
+ int (*set_mp_tablesize) __P((DB_ENV *, u_int32_t));
+ void (*set_msgcall)
+ __P((DB_ENV *, void (*)(const DB_ENV *, const char *)));
+ void (*set_msgfile) __P((DB_ENV *, FILE *));
+ int (*set_paniccall) __P((DB_ENV *, void (*)(DB_ENV *, int)));
+ int (*set_rpc_server)
+ __P((DB_ENV *, void *, const char *, long, long, u_int32_t));
+ int (*set_shm_key) __P((DB_ENV *, long));
+ int (*set_thread_count) __P((DB_ENV *, u_int32_t));
+ int (*set_thread_id)
+ __P((DB_ENV *, void (*)(DB_ENV *, pid_t *, db_threadid_t *)));
+ int (*set_thread_id_string) __P((DB_ENV *,
+ char *(*)(DB_ENV *, pid_t, db_threadid_t, char *)));
+ int (*set_timeout) __P((DB_ENV *, db_timeout_t, u_int32_t));
+ int (*set_tmp_dir) __P((DB_ENV *, const char *));
+ int (*set_tx_max) __P((DB_ENV *, u_int32_t));
+ int (*set_tx_timestamp) __P((DB_ENV *, time_t *));
+ int (*set_verbose) __P((DB_ENV *, u_int32_t, int));
+ int (*stat_print) __P((DB_ENV *, u_int32_t));
+ int (*txn_begin) __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t));
+ int (*txn_checkpoint) __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t));
+ int (*txn_recover) __P((DB_ENV *,
+ DB_PREPLIST *, u_int32_t, u_int32_t *, u_int32_t));
+ int (*txn_stat) __P((DB_ENV *, DB_TXN_STAT **, u_int32_t));
+ int (*txn_stat_print) __P((DB_ENV *, u_int32_t));
+ /* DB_ENV PUBLIC HANDLE LIST END */
+
+ /* DB_ENV PRIVATE HANDLE LIST BEGIN */
+ int (*prdbt) __P((DBT *,
+ int, const char *, void *, int (*)(void *, const void *), int));
+ /* DB_ENV PRIVATE HANDLE LIST END */
+};
+
+/*
+ * Dispatch structure for recovery and print routines. Since internal and
+ * external routines take different arguments (ENV versus DB_ENV), we need
+ * something more elaborate than a single pointer and size.
+ */
+struct __db_distab {
+ int (**int_dispatch) __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
+ size_t int_size;
+ int (**ext_dispatch) __P((DB_ENV *, DBT *, DB_LSN *, db_recops));
+ size_t ext_size;
+};
+
+#ifndef DB_DBM_HSEARCH
+#define DB_DBM_HSEARCH 0 /* No historic interfaces by default. */
+#endif
+#if DB_DBM_HSEARCH != 0
+/*******************************************************
+ * Dbm/Ndbm historic interfaces.
+ *******************************************************/
+typedef struct __db DBM;
+
+#define DBM_INSERT 0 /* Flags to dbm_store(). */
+#define DBM_REPLACE 1
+
+/*
+ * The DB support for ndbm(3) always appends this suffix to the
+ * file name to avoid overwriting the user's original database.
+ */
+#define DBM_SUFFIX ".db"
+
+#if defined(_XPG4_2)
+typedef struct {
+ char *dptr;
+ size_t dsize;
+} datum;
+#else
+typedef struct {
+ char *dptr;
+ int dsize;
+} datum;
+#endif
+
+/*
+ * Translate NDBM calls into DB calls so that DB doesn't step on the
+ * application's name space.
+ */
+#define dbm_clearerr(a) __db_ndbm_clearerr@DB_VERSION_UNIQUE_NAME@(a)
+#define dbm_close(a) __db_ndbm_close@DB_VERSION_UNIQUE_NAME@(a)
+#define dbm_delete(a, b) __db_ndbm_delete@DB_VERSION_UNIQUE_NAME@(a, b)
+#define dbm_dirfno(a) __db_ndbm_dirfno@DB_VERSION_UNIQUE_NAME@(a)
+#define dbm_error(a) __db_ndbm_error@DB_VERSION_UNIQUE_NAME@(a)
+#define dbm_fetch(a, b) __db_ndbm_fetch@DB_VERSION_UNIQUE_NAME@(a, b)
+#define dbm_firstkey(a) __db_ndbm_firstkey@DB_VERSION_UNIQUE_NAME@(a)
+#define dbm_nextkey(a) __db_ndbm_nextkey@DB_VERSION_UNIQUE_NAME@(a)
+#define dbm_open(a, b, c) __db_ndbm_open@DB_VERSION_UNIQUE_NAME@(a, b, c)
+#define dbm_pagfno(a) __db_ndbm_pagfno@DB_VERSION_UNIQUE_NAME@(a)
+#define dbm_rdonly(a) __db_ndbm_rdonly@DB_VERSION_UNIQUE_NAME@(a)
+#define dbm_store(a, b, c, d) \
+ __db_ndbm_store@DB_VERSION_UNIQUE_NAME@(a, b, c, d)
+
+/*
+ * Translate DBM calls into DB calls so that DB doesn't step on the
+ * application's name space.
+ *
+ * The global variables dbrdonly, dirf and pagf were not retained when 4BSD
+ * replaced the dbm interface with ndbm, and are not supported here.
+ */
+#define dbminit(a) __db_dbm_init@DB_VERSION_UNIQUE_NAME@(a)
+#define dbmclose __db_dbm_close@DB_VERSION_UNIQUE_NAME@
+#if !defined(__cplusplus)
+#define delete(a) __db_dbm_delete@DB_VERSION_UNIQUE_NAME@(a)
+#endif
+#define fetch(a) __db_dbm_fetch@DB_VERSION_UNIQUE_NAME@(a)
+#define firstkey __db_dbm_firstkey@DB_VERSION_UNIQUE_NAME@
+#define nextkey(a) __db_dbm_nextkey@DB_VERSION_UNIQUE_NAME@(a)
+#define store(a, b) __db_dbm_store@DB_VERSION_UNIQUE_NAME@(a, b)
+
+/*******************************************************
+ * Hsearch historic interface.
+ *******************************************************/
+typedef enum {
+ FIND, ENTER
+} ACTION;
+
+typedef struct entry {
+ char *key;
+ char *data;
+} ENTRY;
+
+#define hcreate(a) __db_hcreate@DB_VERSION_UNIQUE_NAME@(a)
+#define hdestroy __db_hdestroy@DB_VERSION_UNIQUE_NAME@
+#define hsearch(a, b) __db_hsearch@DB_VERSION_UNIQUE_NAME@(a, b)
+
+#endif /* DB_DBM_HSEARCH */
+
+#if defined(__cplusplus)
+}
+#endif
+
+@platform_footer@
+#endif /* !_DB_H_ */
diff --git a/db-4.8.30/dbinc/db_185.in b/db-4.8.30/dbinc/db_185.in
new file mode 100644
index 0000000..d3da455
--- /dev/null
+++ b/db-4.8.30/dbinc/db_185.in
@@ -0,0 +1,176 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_185_H_
+#define _DB_185_H_
+
+#include <sys/types.h>
+
+#include <limits.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * XXX
+ * Handle function prototypes and the keyword "const". This steps on name
+ * space that DB doesn't control, but all of the other solutions are worse.
+ */
+#undef __P
+#if defined(__STDC__) || defined(__cplusplus)
+#define __P(protos) protos /* ANSI C prototypes */
+#else
+#define const
+#define __P(protos) () /* K&R C preprocessor */
+#endif
+
+#define RET_ERROR -1 /* Return values. */
+#define RET_SUCCESS 0
+#define RET_SPECIAL 1
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+@u_int8_decl@
+@int16_decl@
+@u_int16_decl@
+@int32_decl@
+@u_int32_decl@
+#endif
+
+/*
+ * XXX
+ * SGI/IRIX already has a pgno_t.
+ */
+#ifdef __sgi
+#define pgno_t db_pgno_t
+#endif
+
+#define MAX_PAGE_NUMBER 0xffffffff /* >= # of pages in a file */
+typedef u_int32_t pgno_t;
+#define MAX_PAGE_OFFSET 65535 /* >= # of bytes in a page */
+typedef u_int16_t indx_t;
+#define MAX_REC_NUMBER 0xffffffff /* >= # of records in a tree */
+typedef u_int32_t recno_t;
+
+/* Key/data structure -- a Data-Base Thang. */
+typedef struct {
+ void *data; /* data */
+ size_t size; /* data length */
+} DBT;
+
+/* Routine flags. */
+#define R_CURSOR 1 /* del, put, seq */
+#define __R_UNUSED 2 /* UNUSED */
+#define R_FIRST 3 /* seq */
+#define R_IAFTER 4 /* put (RECNO) */
+#define R_IBEFORE 5 /* put (RECNO) */
+#define R_LAST 6 /* seq (BTREE, RECNO) */
+#define R_NEXT 7 /* seq */
+#define R_NOOVERWRITE 8 /* put */
+#define R_PREV 9 /* seq (BTREE, RECNO) */
+#define R_SETCURSOR 10 /* put (RECNO) */
+#define R_RECNOSYNC 11 /* sync (RECNO) */
+
+typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
+
+/* Access method description structure. */
+typedef struct __db {
+ DBTYPE type; /* Underlying db type. */
+ int (*close) __P((struct __db *));
+ int (*del) __P((const struct __db *, const DBT *, u_int));
+ int (*get) __P((const struct __db *, const DBT *, DBT *, u_int));
+ int (*put) __P((const struct __db *, DBT *, const DBT *, u_int));
+ int (*seq) __P((const struct __db *, DBT *, DBT *, u_int));
+ int (*sync) __P((const struct __db *, u_int));
+ void *internal; /* Access method private. */
+ int (*fd) __P((const struct __db *));
+} DB;
+
+#define BTREEMAGIC 0x053162
+#define BTREEVERSION 3
+
+/* Structure used to pass parameters to the btree routines. */
+typedef struct {
+#define R_DUP 0x01 /* duplicate keys */
+ u_int32_t flags;
+ u_int32_t cachesize; /* bytes to cache */
+ u_int32_t maxkeypage; /* maximum keys per page */
+ u_int32_t minkeypage; /* minimum keys per page */
+ u_int32_t psize; /* page size */
+ int (*compare) /* comparison function */
+ __P((const DBT *, const DBT *));
+ size_t (*prefix) /* prefix function */
+ __P((const DBT *, const DBT *));
+ int lorder; /* byte order */
+} BTREEINFO;
+
+#define HASHMAGIC 0x061561
+#define HASHVERSION 2
+
+/* Structure used to pass parameters to the hashing routines. */
+typedef struct {
+ u_int32_t bsize; /* bucket size */
+ u_int32_t ffactor; /* fill factor */
+ u_int32_t nelem; /* number of elements */
+ u_int32_t cachesize; /* bytes to cache */
+ u_int32_t /* hash function */
+ (*hash) __P((const void *, size_t));
+ int lorder; /* byte order */
+} HASHINFO;
+
+/* Structure used to pass parameters to the record routines. */
+typedef struct {
+#define R_FIXEDLEN 0x01 /* fixed-length records */
+#define R_NOKEY 0x02 /* key not required */
+#define R_SNAPSHOT 0x04 /* snapshot the input */
+ u_int32_t flags;
+ u_int32_t cachesize; /* bytes to cache */
+ u_int32_t psize; /* page size */
+ int lorder; /* byte order */
+ size_t reclen; /* record length (fixed-length records) */
+ u_char bval; /* delimiting byte (variable-length records */
+ char *bfname; /* btree file name */
+} RECNOINFO;
+
+/* Re-define the user's dbopen calls. */
+#define dbopen __db185_open@DB_VERSION_UNIQUE_NAME@
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* !_DB_185_H_ */
diff --git a/db-4.8.30/dbinc/db_am.h b/db-4.8.30/dbinc/db_am.h
new file mode 100644
index 0000000..4b2aa22
--- /dev/null
+++ b/db-4.8.30/dbinc/db_am.h
@@ -0,0 +1,311 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+#ifndef _DB_AM_H_
+#define _DB_AM_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Temporary for the patch release, define this bit here so it
+ * does not renumber the other bits for DB->open.
+ */
+#define DB_NOERROR 0x10000000
+
+struct __db_foreign_info; \
+ typedef struct __db_foreign_info DB_FOREIGN_INFO;
+
+/*
+ * Keep track of information for foreign keys. Used to maintain a linked list
+ * of 'primary' DBs which reference this 'foreign' DB.
+ */
+struct __db_foreign_info {
+ DB *dbp;
+ u_int32_t flags;
+ int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *));
+
+ /*
+ * List entries for foreign key.
+ *
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * LIST_ENTRY(__db) s_links;
+ */
+ struct {
+ struct __db_foreign_info *le_next;
+ struct __db_foreign_info **le_prev;
+ } f_links;
+};
+
+/*
+ * IS_ENV_AUTO_COMMIT --
+ * Auto-commit test for enviroment operations: DbEnv::{open,remove,rename}
+ */
+#define IS_ENV_AUTO_COMMIT(env, txn, flags) \
+ (LF_ISSET(DB_AUTO_COMMIT) || ((txn) == NULL && \
+ F_ISSET((env)->dbenv, DB_ENV_AUTO_COMMIT) && \
+ !LF_ISSET(DB_NO_AUTO_COMMIT)))
+
+/*
+ * IS_DB_AUTO_COMMIT --
+ * Auto-commit test for database operations.
+ */
+#define IS_DB_AUTO_COMMIT(dbp, txn) \
+ ((txn) == NULL && F_ISSET((dbp), DB_AM_TXN))
+
+/*
+ * STRIP_AUTO_COMMIT --
+ * Releases after 4.3 no longer requires DB operations to specify the
+ * AUTO_COMMIT flag, but the API continues to allow it to be specified.
+ */
+#define STRIP_AUTO_COMMIT(f) FLD_CLR((f), DB_AUTO_COMMIT)
+
+/* DB recovery operation codes. */
+#define DB_ADD_DUP 1
+#define DB_REM_DUP 2
+#define DB_ADD_BIG 3
+#define DB_REM_BIG 4
+#define DB_ADD_PAGE_COMPAT 5 /* Compatibility for 4.2 db_relink */
+#define DB_REM_PAGE_COMPAT 6 /* Compatibility for 4.2 db_relink */
+#define DB_APPEND_BIG 7
+
+/*
+ * Standard initialization and shutdown macros for all recovery functions.
+ */
+#define REC_INTRO(func, ip, do_cursor) do { \
+ argp = NULL; \
+ dbc = NULL; \
+ file_dbp = NULL; \
+ COMPQUIET(mpf, NULL); /* Not all recovery routines use mpf. */\
+ if ((ret = func(env, &file_dbp, \
+ (info != NULL) ? ((DB_TXNHEAD *)info)->td : NULL, \
+ dbtp->data, &argp)) != 0) { \
+ if (ret == DB_DELETED) { \
+ ret = 0; \
+ goto done; \
+ } \
+ goto out; \
+ } \
+ if (do_cursor) { \
+ if ((ret = \
+ __db_cursor(file_dbp, ip, NULL, &dbc, 0)) != 0) \
+ goto out; \
+ F_SET(dbc, DBC_RECOVER); \
+ } \
+ mpf = file_dbp->mpf; \
+} while (0)
+
+#define REC_CLOSE { \
+ int __t_ret; \
+ if (argp != NULL) \
+ __os_free(env, argp); \
+ if (dbc != NULL && \
+ (__t_ret = __dbc_close(dbc)) != 0 && ret == 0) \
+ ret = __t_ret; \
+ } \
+ return (ret)
+
+/*
+ * No-op versions of the same macros.
+ */
+#define REC_NOOP_INTRO(func) do { \
+ argp = NULL; \
+ if ((ret = func(env, dbtp->data, &argp)) != 0) \
+ return (ret); \
+} while (0)
+#define REC_NOOP_CLOSE \
+ if (argp != NULL) \
+ __os_free(env, argp); \
+ return (ret)
+
+/*
+ * Macro for reading pages during recovery. In most cases we
+ * want to avoid an error if the page is not found during rollback.
+ */
+#define REC_FGET(mpf, ip, pgno, pagep, cont) \
+ if ((ret = __memp_fget(mpf, \
+ &(pgno), ip, NULL, 0, pagep)) != 0) { \
+ if (ret != DB_PAGE_NOTFOUND) { \
+ ret = __db_pgerr(file_dbp, pgno, ret); \
+ goto out; \
+ } else \
+ goto cont; \
+ }
+#define REC_DIRTY(mpf, ip, priority, pagep) \
+ if ((ret = __memp_dirty(mpf, \
+ pagep, ip, NULL, priority, DB_MPOOL_EDIT)) != 0) { \
+ ret = __db_pgerr(file_dbp, PGNO(*(pagep)), ret); \
+ goto out; \
+ }
+
+/*
+ * Standard debugging macro for all recovery functions.
+ */
+#ifdef DEBUG_RECOVER
+#define REC_PRINT(func) \
+ (void)func(env, dbtp, lsnp, op, info);
+#else
+#define REC_PRINT(func)
+#endif
+
+/*
+ * Actions to __db_lget
+ */
+#define LCK_ALWAYS 1 /* Lock even for off page dup cursors */
+#define LCK_COUPLE 2 /* Lock Couple */
+#define LCK_COUPLE_ALWAYS 3 /* Lock Couple even in txn. */
+#define LCK_DOWNGRADE 4 /* Downgrade the lock. (internal) */
+#define LCK_ROLLBACK 5 /* Lock even if in rollback */
+
+/*
+ * If doing transactions we have to hold the locks associated with a data item
+ * from a page for the entire transaction. However, we don't have to hold the
+ * locks associated with walking the tree. Distinguish between the two so that
+ * we don't tie up the internal pages of the tree longer than necessary.
+ */
+#define __LPUT(dbc, lock) \
+ __ENV_LPUT((dbc)->env, lock)
+
+#define __ENV_LPUT(env, lock) \
+ (LOCK_ISSET(lock) ? __lock_put(env, &(lock)) : 0)
+
+/*
+ * __TLPUT -- transactional lock put
+ * If the lock is valid then
+ * If we are not in a transaction put the lock.
+ * Else if the cursor is doing dirty reads and this was a read then
+ * put the lock.
+ * Else if the db is supporting dirty reads and this is a write then
+ * downgrade it.
+ * Else do nothing.
+ */
+#define __TLPUT(dbc, lock) \
+ (LOCK_ISSET(lock) ? __db_lput(dbc, &(lock)) : 0)
+
+/*
+ * Check whether a database is a primary (that is, has associated secondaries).
+ */
+#define DB_IS_PRIMARY(dbp) (LIST_FIRST(&dbp->s_secondaries) != NULL)
+/*
+ * A database should be required to be readonly if it's been explicitly
+ * specified as such or if we're a client in a replicated environment
+ * and the user did not specify DB_TXN_NOT_DURABLE.
+ */
+#define DB_IS_READONLY(dbp) \
+ (F_ISSET(dbp, DB_AM_RDONLY) || \
+ (IS_REP_CLIENT((dbp)->env) && !F_ISSET((dbp), DB_AM_NOT_DURABLE)))
+
+#ifdef HAVE_COMPRESSION
+/*
+ * Check whether a database is compressed (btree only)
+ */
+#define DB_IS_COMPRESSED(dbp) \
+ (((BTREE *)(dbp)->bt_internal)->bt_compress != NULL)
+#endif
+
+/*
+ * We copy the key out if there's any chance the key in the database is not
+ * the same as the user-specified key. If there is a custom comparator we
+ * return a key, as the user-specified key might be a partial key, containing
+ * only the unique identifier. [#13572] [#15770]
+ *
+ * The test for (flags != 0) is necessary for Db.{get,pget}, but it's not
+ * legal to pass a non-zero flags value to Dbc.{get,pget}.
+ *
+ * We need to split out the hash component, since it is possible to build
+ * without hash support enabled. Which would result in a null pointer access.
+ */
+#ifdef HAVE_HASH
+#define DB_RETURNS_A_KEY_HASH(dbp) \
+ ((HASH *)(dbp)->h_internal)->h_compare != NULL
+#else
+#define DB_RETURNS_A_KEY_HASH(dbp) 0
+#endif
+#define DB_RETURNS_A_KEY(dbp, flags) \
+ (((flags) != 0 && (flags) != DB_GET_BOTH && \
+ (flags) != DB_GET_BOTH_RANGE && (flags) != DB_SET) || \
+ ((BTREE *)(dbp)->bt_internal)->bt_compare != __bam_defcmp ||\
+ DB_RETURNS_A_KEY_HASH(dbp))
+
+/*
+ * For portability, primary keys that are record numbers are stored in
+ * secondaries in the same byte order as the secondary database. As a
+ * consequence, we need to swap the byte order of these keys before attempting
+ * to use them for lookups in the primary. We also need to swap user-supplied
+ * primary keys that are used in secondary lookups (for example, with the
+ * DB_GET_BOTH flag on a secondary get).
+ */
+#include "dbinc/db_swap.h"
+
+#define SWAP_IF_NEEDED(sdbp, pkey) \
+ do { \
+ if (((sdbp)->s_primary->type == DB_QUEUE || \
+ (sdbp)->s_primary->type == DB_RECNO) && \
+ F_ISSET((sdbp), DB_AM_SWAP)) \
+ P_32_SWAP((pkey)->data); \
+ } while (0)
+
+/*
+ * Cursor adjustment:
+ * Return the first DB handle in the sorted ENV list of DB
+ * handles that has a matching file ID.
+ */
+#define FIND_FIRST_DB_MATCH(env, dbp, tdbp) do { \
+ for ((tdbp) = (dbp); \
+ TAILQ_PREV((tdbp), __dblist, dblistlinks) != NULL && \
+ TAILQ_PREV((tdbp), \
+ __dblist, dblistlinks)->adj_fileid == (dbp)->adj_fileid;\
+ (tdbp) = TAILQ_PREV((tdbp), __dblist, dblistlinks)) \
+ ; \
+} while (0)
+
+/*
+ * Macros used to implement a binary search algorithm. Shared between the
+ * btree and hash implementations.
+ */
+#define DB_BINARY_SEARCH_FOR(base, limit, nument, adjust) \
+ for (base = 0, limit = (nument) / (db_indx_t)(adjust); \
+ (limit) != 0; (limit) >>= 1)
+
+#define DB_BINARY_SEARCH_INCR(index, base, limit, adjust) \
+ index = (base) + (((limit) >> 1) * (adjust))
+
+#define DB_BINARY_SEARCH_SHIFT_BASE(index, base, limit, adjust) do { \
+ base = (index) + (adjust); \
+ --(limit); \
+} while (0)
+
+/*
+ * Sequence macros, shared between sequence.c and seq_stat.c
+ */
+#define SEQ_IS_OPEN(seq) ((seq)->seq_key.data != NULL)
+
+#define SEQ_ILLEGAL_AFTER_OPEN(seq, name) \
+ if (SEQ_IS_OPEN(seq)) \
+ return (__db_mi_open((seq)->seq_dbp->env, name, 1));
+
+#define SEQ_ILLEGAL_BEFORE_OPEN(seq, name) \
+ if (!SEQ_IS_OPEN(seq)) \
+ return (__db_mi_open((seq)->seq_dbp->env, name, 0));
+
+/*
+ * Flags to __db_chk_meta.
+ */
+#define DB_CHK_META 0x01 /* Checksum the meta page. */
+#define DB_CHK_NOLSN 0x02 /* Don't check the LSN. */
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc/db_dispatch.h"
+#include "dbinc_auto/db_auto.h"
+#include "dbinc_auto/crdel_auto.h"
+#include "dbinc_auto/db_ext.h"
+#endif /* !_DB_AM_H_ */
diff --git a/db-4.8.30/dbinc/db_cxx.in b/db-4.8.30/dbinc/db_cxx.in
new file mode 100644
index 0000000..0d0fd12
--- /dev/null
+++ b/db-4.8.30/dbinc/db_cxx.in
@@ -0,0 +1,1365 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_CXX_H_
+#define _DB_CXX_H_
+//
+// C++ assumptions:
+//
+// To ensure portability to many platforms, both new and old, we make
+// few assumptions about the C++ compiler and library. For example,
+// we do not expect STL, templates or namespaces to be available. The
+// "newest" C++ feature used is exceptions, which are used liberally
+// to transmit error information. Even the use of exceptions can be
+// disabled at runtime, to do so, use the DB_CXX_NO_EXCEPTIONS flags
+// with the DbEnv or Db constructor.
+//
+// C++ naming conventions:
+//
+// - All top level class names start with Db.
+// - All class members start with lower case letter.
+// - All private data members are suffixed with underscore.
+// - Use underscores to divide names into multiple words.
+// - Simple data accessors are named with get_ or set_ prefix.
+// - All method names are taken from names of functions in the C
+// layer of db (usually by dropping a prefix like "db_").
+// These methods have the same argument types and order,
+// other than dropping the explicit arg that acts as "this".
+//
+// As a rule, each DbFoo object has exactly one underlying DB_FOO struct
+// (defined in db.h) associated with it. In some cases, we inherit directly
+// from the DB_FOO structure to make this relationship explicit. Often,
+// the underlying C layer allocates and deallocates these structures, so
+// there is no easy way to add any data to the DbFoo class. When you see
+// a comment about whether data is permitted to be added, this is what
+// is going on. Of course, if we need to add data to such C++ classes
+// in the future, we will arrange to have an indirect pointer to the
+// DB_FOO struct (as some of the classes already have).
+//
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Forward declarations
+//
+
+#include <stdarg.h>
+
+@cxx_have_stdheaders@
+#ifdef HAVE_CXX_STDHEADERS
+#include <iostream>
+#include <exception>
+#define __DB_STD(x) std::x
+#else
+#include <iostream.h>
+#include <exception.h>
+#define __DB_STD(x) x
+#endif
+
+#include "db.h"
+
+class Db; // forward
+class Dbc; // forward
+class DbEnv; // forward
+class DbInfo; // forward
+class DbLock; // forward
+class DbLogc; // forward
+class DbLsn; // forward
+class DbMpoolFile; // forward
+class DbPreplist; // forward
+class DbSequence; // forward
+class Dbt; // forward
+class DbTxn; // forward
+
+class DbMultipleIterator; // forward
+class DbMultipleKeyDataIterator; // forward
+class DbMultipleRecnoDataIterator; // forward
+class DbMultipleDataIterator; // forward
+
+class DbException; // forward
+class DbDeadlockException; // forward
+class DbLockNotGrantedException; // forward
+class DbMemoryException; // forward
+class DbRepHandleDeadException; // forward
+class DbRunRecoveryException; // forward
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Turn off inappropriate compiler warnings
+//
+
+#ifdef _MSC_VER
+
+// These are level 4 warnings that are explicitly disabled.
+// With Visual C++, by default you do not see above level 3 unless
+// you use /W4. But we like to compile with the highest level
+// warnings to catch other errors.
+//
+// 4201: nameless struct/union
+// triggered by standard include file <winnt.h>
+//
+// 4514: unreferenced inline function has been removed
+// certain include files in MSVC define methods that are not called
+//
+#pragma warning(push)
+#pragma warning(disable: 4201 4514)
+
+#endif
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Mechanisms for declaring classes
+//
+
+//
+// Every class defined in this file has an _exported next to the class name.
+// This is needed for WinTel machines so that the class methods can
+// be exported or imported in a DLL as appropriate. Users of the DLL
+// use the define DB_USE_DLL. When the DLL is built, DB_CREATE_DLL
+// must be defined.
+//
+#if defined(_MSC_VER)
+
+# if defined(DB_CREATE_DLL)
+# define _exported __declspec(dllexport) // creator of dll
+# elif defined(DB_USE_DLL)
+# define _exported __declspec(dllimport) // user of dll
+# else
+# define _exported // static lib creator or user
+# endif
+
+#else /* _MSC_VER */
+
+# define _exported
+
+#endif /* _MSC_VER */
+
+// Some interfaces can be customized by allowing users to define
+// callback functions. For performance and logistical reasons, some
+// callback functions must be declared in extern "C" blocks. For others,
+// we allow you to declare the callbacks in C++ or C (or an extern "C"
+// block) as you wish. See the set methods for the callbacks for
+// the choices.
+//
+extern "C" {
+ typedef void * (*db_malloc_fcn_type)
+ (size_t);
+ typedef void * (*db_realloc_fcn_type)
+ (void *, size_t);
+ typedef void (*db_free_fcn_type)
+ (void *);
+ typedef int (*bt_compare_fcn_type) /*C++ version available*/
+ (DB *, const DBT *, const DBT *);
+ typedef size_t (*bt_prefix_fcn_type) /*C++ version available*/
+ (DB *, const DBT *, const DBT *);
+ typedef int (*dup_compare_fcn_type) /*C++ version available*/
+ (DB *, const DBT *, const DBT *);
+ typedef int (*h_compare_fcn_type) /*C++ version available*/
+ (DB *, const DBT *, const DBT *);
+ typedef u_int32_t (*h_hash_fcn_type) /*C++ version available*/
+ (DB *, const void *, u_int32_t);
+ typedef int (*pgin_fcn_type)
+ (DB_ENV *dbenv, db_pgno_t pgno, void *pgaddr, DBT *pgcookie);
+ typedef int (*pgout_fcn_type)
+ (DB_ENV *dbenv, db_pgno_t pgno, void *pgaddr, DBT *pgcookie);
+}
+
+//
+// Represents a database table = a set of keys with associated values.
+//
+class _exported Db
+{
+ friend class DbEnv;
+
+public:
+ Db(DbEnv*, u_int32_t); // Create a Db object.
+ virtual ~Db(); // Calls close() if the user hasn't.
+
+ // These methods exactly match those in the C interface.
+ //
+ virtual int associate(DbTxn *txn, Db *secondary, int (*callback)
+ (Db *, const Dbt *, const Dbt *, Dbt *), u_int32_t flags);
+ virtual int associate_foreign(Db *foreign, int (*callback)
+ (Db *, const Dbt *, Dbt *, const Dbt *, int *), u_int32_t flags);
+ virtual int close(u_int32_t flags);
+ virtual int compact(DbTxn *txnid, Dbt *start,
+ Dbt *stop, DB_COMPACT *c_data, u_int32_t flags, Dbt *end);
+ virtual int cursor(DbTxn *txnid, Dbc **cursorp, u_int32_t flags);
+ virtual int del(DbTxn *txnid, Dbt *key, u_int32_t flags);
+ virtual void err(int, const char *, ...);
+ virtual void errx(const char *, ...);
+ virtual int exists(DbTxn *txnid, Dbt *key, u_int32_t flags);
+ virtual int fd(int *fdp);
+ virtual int get(DbTxn *txnid, Dbt *key, Dbt *data, u_int32_t flags);
+ virtual int get_alloc(
+ db_malloc_fcn_type *, db_realloc_fcn_type *, db_free_fcn_type *);
+ virtual int get_append_recno(int (**)(Db *, Dbt *, db_recno_t));
+ virtual int get_bt_compare(int (**)(Db *, const Dbt *, const Dbt *));
+ virtual int get_bt_compress(
+ int (**)(
+ Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *),
+ int (**)(Db *, const Dbt *, const Dbt *, Dbt *, Dbt *, Dbt *));
+ virtual int get_bt_minkey(u_int32_t *);
+ virtual int get_bt_prefix(size_t (**)(Db *, const Dbt *, const Dbt *));
+ virtual int get_byteswapped(int *);
+ virtual int get_cachesize(u_int32_t *, u_int32_t *, int *);
+ virtual int get_create_dir(const char **);
+ virtual int get_dbname(const char **, const char **);
+ virtual int get_dup_compare(int (**)(Db *, const Dbt *, const Dbt *));
+ virtual int get_encrypt_flags(u_int32_t *);
+ virtual void get_errcall(
+ void (**)(const DbEnv *, const char *, const char *));
+ virtual void get_errfile(FILE **);
+ virtual void get_errpfx(const char **);
+ virtual int get_feedback(void (**)(Db *, int, int));
+ virtual int get_flags(u_int32_t *);
+ virtual int get_h_compare(int (**)(Db *, const Dbt *, const Dbt *));
+ virtual int get_h_ffactor(u_int32_t *);
+ virtual int get_h_hash(u_int32_t (**)(Db *, const void *, u_int32_t));
+ virtual int get_h_nelem(u_int32_t *);
+ virtual int get_lorder(int *);
+ virtual void get_msgcall(void (**)(const DbEnv *, const char *));
+ virtual void get_msgfile(FILE **);
+ virtual int get_multiple();
+ virtual int get_open_flags(u_int32_t *);
+ virtual int get_pagesize(u_int32_t *);
+ virtual int get_partition_callback(
+ u_int32_t *, u_int32_t (**)(Db *, Dbt *key));
+ virtual int get_partition_dirs(const char ***);
+ virtual int get_partition_keys(u_int32_t *, Dbt **);
+ virtual int get_priority(DB_CACHE_PRIORITY *);
+ virtual int get_q_extentsize(u_int32_t *);
+ virtual int get_re_delim(int *);
+ virtual int get_re_len(u_int32_t *);
+ virtual int get_re_pad(int *);
+ virtual int get_re_source(const char **);
+ virtual int get_transactional();
+ virtual int get_type(DBTYPE *);
+ virtual int join(Dbc **curslist, Dbc **dbcp, u_int32_t flags);
+ virtual int key_range(DbTxn *, Dbt *, DB_KEY_RANGE *, u_int32_t);
+ virtual int open(DbTxn *txnid,
+ const char *, const char *subname, DBTYPE, u_int32_t, int);
+ virtual int pget(DbTxn *txnid,
+ Dbt *key, Dbt *pkey, Dbt *data, u_int32_t flags);
+ virtual int put(DbTxn *, Dbt *, Dbt *, u_int32_t);
+ virtual int remove(const char *, const char *, u_int32_t);
+ virtual int rename(const char *, const char *, const char *, u_int32_t);
+ virtual int set_alloc(
+ db_malloc_fcn_type, db_realloc_fcn_type, db_free_fcn_type);
+ virtual void set_app_private(void *);
+ virtual int set_append_recno(int (*)(Db *, Dbt *, db_recno_t));
+ virtual int set_bt_compare(bt_compare_fcn_type); /*deprecated*/
+ virtual int set_bt_compare(int (*)(Db *, const Dbt *, const Dbt *));
+ virtual int set_bt_compress(
+ int (*)
+ (Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *),
+ int (*)(Db *, const Dbt *, const Dbt *, Dbt *, Dbt *, Dbt *));
+ virtual int set_bt_minkey(u_int32_t);
+ virtual int set_bt_prefix(bt_prefix_fcn_type); /*deprecated*/
+ virtual int set_bt_prefix(size_t (*)(Db *, const Dbt *, const Dbt *));
+ virtual int set_cachesize(u_int32_t, u_int32_t, int);
+ virtual int set_create_dir(const char *);
+ virtual int set_dup_compare(dup_compare_fcn_type); /*deprecated*/
+ virtual int set_dup_compare(int (*)(Db *, const Dbt *, const Dbt *));
+ virtual int set_encrypt(const char *, u_int32_t);
+ virtual void set_errcall(
+ void (*)(const DbEnv *, const char *, const char *));
+ virtual void set_errfile(FILE *);
+ virtual void set_errpfx(const char *);
+ virtual int set_feedback(void (*)(Db *, int, int));
+ virtual int set_flags(u_int32_t);
+ virtual int set_h_compare(h_compare_fcn_type); /*deprecated*/
+ virtual int set_h_compare(int (*)(Db *, const Dbt *, const Dbt *));
+ virtual int set_h_ffactor(u_int32_t);
+ virtual int set_h_hash(h_hash_fcn_type); /*deprecated*/
+ virtual int set_h_hash(u_int32_t (*)(Db *, const void *, u_int32_t));
+ virtual int set_h_nelem(u_int32_t);
+ virtual int set_lorder(int);
+ virtual void set_msgcall(void (*)(const DbEnv *, const char *));
+ virtual void set_msgfile(FILE *);
+ virtual int set_pagesize(u_int32_t);
+ virtual int set_paniccall(void (*)(DbEnv *, int));
+ virtual int set_partition(
+ u_int32_t, Dbt *, u_int32_t (*)(Db *, Dbt *));
+ virtual int set_partition_dirs(const char **);
+ virtual int set_priority(DB_CACHE_PRIORITY);
+ virtual int set_q_extentsize(u_int32_t);
+ virtual int set_re_delim(int);
+ virtual int set_re_len(u_int32_t);
+ virtual int set_re_pad(int);
+ virtual int set_re_source(const char *);
+ virtual int sort_multiple(Dbt *, Dbt *, u_int32_t);
+ virtual int stat(DbTxn *, void *sp, u_int32_t flags);
+ virtual int stat_print(u_int32_t flags);
+ virtual int sync(u_int32_t flags);
+ virtual int truncate(DbTxn *, u_int32_t *, u_int32_t);
+ virtual int upgrade(const char *name, u_int32_t flags);
+ virtual int verify(
+ const char *, const char *, __DB_STD(ostream) *, u_int32_t);
+
+ // These additional methods are not in the C interface, and
+ // are only available for C++.
+ //
+ virtual void *get_app_private() const;
+ virtual __DB_STD(ostream) *get_error_stream();
+ virtual void set_error_stream(__DB_STD(ostream) *);
+ virtual __DB_STD(ostream) *get_message_stream();
+ virtual void set_message_stream(__DB_STD(ostream) *);
+
+ virtual DbEnv *get_env();
+ virtual DbMpoolFile *get_mpf();
+
+ virtual ENV *get_ENV()
+ {
+ return imp_->env;
+ }
+
+ virtual DB *get_DB()
+ {
+ return imp_;
+ }
+
+ virtual const DB *get_const_DB() const
+ {
+ return imp_;
+ }
+
+ static Db* get_Db(DB *db)
+ {
+ return (Db *)db->api_internal;
+ }
+
+ static const Db* get_const_Db(const DB *db)
+ {
+ return (const Db *)db->api_internal;
+ }
+
+ u_int32_t get_create_flags() const
+ {
+ return construct_flags_;
+ }
+
+private:
+ // no copying
+ Db(const Db &);
+ Db &operator = (const Db &);
+
+ void cleanup();
+ int initialize();
+ int error_policy();
+
+ // instance data
+ DB *imp_;
+ DbEnv *dbenv_;
+ DbMpoolFile *mpf_;
+ int construct_error_;
+ u_int32_t flags_;
+ u_int32_t construct_flags_;
+
+public:
+ // These are public only because they need to be called
+ // via C callback functions. They should never be used by
+ // external users of this class.
+ //
+ int (*append_recno_callback_)(Db *, Dbt *, db_recno_t);
+ int (*associate_callback_)(Db *, const Dbt *, const Dbt *, Dbt *);
+ int (*associate_foreign_callback_)
+ (Db *, const Dbt *, Dbt *, const Dbt *, int *);
+ int (*bt_compare_callback_)(Db *, const Dbt *, const Dbt *);
+ int (*bt_compress_callback_)(
+ Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *);
+ int (*bt_decompress_callback_)(
+ Db *, const Dbt *, const Dbt *, Dbt *, Dbt *, Dbt *);
+ size_t (*bt_prefix_callback_)(Db *, const Dbt *, const Dbt *);
+ u_int32_t (*db_partition_callback_)(Db *, Dbt *);
+ int (*dup_compare_callback_)(Db *, const Dbt *, const Dbt *);
+ void (*feedback_callback_)(Db *, int, int);
+ int (*h_compare_callback_)(Db *, const Dbt *, const Dbt *);
+ u_int32_t (*h_hash_callback_)(Db *, const void *, u_int32_t);
+};
+
+//
+// Cursor
+//
+class _exported Dbc : protected DBC
+{
+ friend class Db;
+
+public:
+ int close();
+ int cmp(Dbc *other_csr, int *result, u_int32_t flags);
+ int count(db_recno_t *countp, u_int32_t flags);
+ int del(u_int32_t flags);
+ int dup(Dbc** cursorp, u_int32_t flags);
+ int get(Dbt* key, Dbt *data, u_int32_t flags);
+ int get_priority(DB_CACHE_PRIORITY *priorityp);
+ int pget(Dbt* key, Dbt* pkey, Dbt *data, u_int32_t flags);
+ int put(Dbt* key, Dbt *data, u_int32_t flags);
+ int set_priority(DB_CACHE_PRIORITY priority);
+
+private:
+ // No data is permitted in this class (see comment at top)
+
+ // Note: use Db::cursor() to get pointers to a Dbc,
+ // and call Dbc::close() rather than delete to release them.
+ //
+ Dbc();
+ ~Dbc();
+
+ // no copying
+ Dbc(const Dbc &);
+ Dbc &operator = (const Dbc &);
+};
+
+//
+// Berkeley DB environment class. Provides functions for opening databases.
+// User of this library can use this class as a starting point for
+// developing a DB application - derive their application class from
+// this one, add application control logic.
+//
+// Note that if you use the default constructor, you must explicitly
+// call appinit() before any other db activity (e.g. opening files)
+//
+class _exported DbEnv
+{
+ friend class Db;
+ friend class DbLock;
+ friend class DbMpoolFile;
+
+public:
+ // After using this constructor, you can set any needed
+ // parameters for the environment using the set_* methods.
+ // Then call open() to finish initializing the environment
+ // and attaching it to underlying files.
+ //
+ DbEnv(u_int32_t flags);
+
+ virtual ~DbEnv();
+
+ // These methods match those in the C interface.
+ //
+ virtual int add_data_dir(const char *);
+ virtual int cdsgroup_begin(DbTxn **tid);
+ virtual int close(u_int32_t);
+ virtual int dbremove(DbTxn *txn, const char *name, const char *subdb,
+ u_int32_t flags);
+ virtual int dbrename(DbTxn *txn, const char *name, const char *subdb,
+ const char *newname, u_int32_t flags);
+ virtual void err(int, const char *, ...);
+ virtual void errx(const char *, ...);
+ virtual int failchk(u_int32_t);
+ virtual int fileid_reset(const char *, u_int32_t);
+ virtual int get_alloc(db_malloc_fcn_type *, db_realloc_fcn_type *,
+ db_free_fcn_type *);
+ virtual void *get_app_private() const;
+ virtual int get_home(const char **);
+ virtual int get_open_flags(u_int32_t *);
+ virtual int open(const char *, u_int32_t, int);
+ virtual int remove(const char *, u_int32_t);
+ virtual int stat_print(u_int32_t flags);
+
+ virtual int set_alloc(db_malloc_fcn_type, db_realloc_fcn_type,
+ db_free_fcn_type);
+ virtual void set_app_private(void *);
+ virtual int get_cachesize(u_int32_t *, u_int32_t *, int *);
+ virtual int set_cachesize(u_int32_t, u_int32_t, int);
+ virtual int get_cache_max(u_int32_t *, u_int32_t *);
+ virtual int set_cache_max(u_int32_t, u_int32_t);
+ virtual int get_create_dir(const char **);
+ virtual int set_create_dir(const char *);
+ virtual int get_data_dirs(const char ***);
+ virtual int set_data_dir(const char *);
+ virtual int get_encrypt_flags(u_int32_t *);
+ virtual int get_intermediate_dir_mode(const char **);
+ virtual int set_intermediate_dir_mode(const char *);
+ virtual int get_isalive(
+ int (**)(DbEnv *, pid_t, db_threadid_t, u_int32_t));
+ virtual int set_isalive(
+ int (*)(DbEnv *, pid_t, db_threadid_t, u_int32_t));
+ virtual int set_encrypt(const char *, u_int32_t);
+ virtual void get_errcall(
+ void (**)(const DbEnv *, const char *, const char *));
+ virtual void set_errcall(
+ void (*)(const DbEnv *, const char *, const char *));
+ virtual void get_errfile(FILE **);
+ virtual void set_errfile(FILE *);
+ virtual void get_errpfx(const char **);
+ virtual void set_errpfx(const char *);
+ virtual int set_event_notify(void (*)(DbEnv *, u_int32_t, void *));
+ virtual int get_flags(u_int32_t *);
+ virtual int set_flags(u_int32_t, int);
+ virtual bool is_bigendian();
+ virtual int lsn_reset(const char *, u_int32_t);
+ virtual int get_feedback(void (**)(DbEnv *, int, int));
+ virtual int set_feedback(void (*)(DbEnv *, int, int));
+ virtual int get_lg_bsize(u_int32_t *);
+ virtual int set_lg_bsize(u_int32_t);
+ virtual int get_lg_dir(const char **);
+ virtual int set_lg_dir(const char *);
+ virtual int get_lg_filemode(int *);
+ virtual int set_lg_filemode(int);
+ virtual int get_lg_max(u_int32_t *);
+ virtual int set_lg_max(u_int32_t);
+ virtual int get_lg_regionmax(u_int32_t *);
+ virtual int set_lg_regionmax(u_int32_t);
+ virtual int get_lk_conflicts(const u_int8_t **, int *);
+ virtual int set_lk_conflicts(u_int8_t *, int);
+ virtual int get_lk_detect(u_int32_t *);
+ virtual int set_lk_detect(u_int32_t);
+ virtual int get_lk_max_lockers(u_int32_t *);
+ virtual int set_lk_max_lockers(u_int32_t);
+ virtual int get_lk_max_locks(u_int32_t *);
+ virtual int set_lk_max_locks(u_int32_t);
+ virtual int get_lk_max_objects(u_int32_t *);
+ virtual int set_lk_max_objects(u_int32_t);
+ virtual int get_lk_partitions(u_int32_t *);
+ virtual int set_lk_partitions(u_int32_t);
+ virtual int get_mp_mmapsize(size_t *);
+ virtual int set_mp_mmapsize(size_t);
+ virtual int get_mp_max_openfd(int *);
+ virtual int set_mp_max_openfd(int);
+ virtual int get_mp_max_write(int *, db_timeout_t *);
+ virtual int set_mp_max_write(int, db_timeout_t);
+ virtual int get_mp_pagesize(u_int32_t *);
+ virtual int set_mp_pagesize(u_int32_t);
+ virtual int get_mp_tablesize(u_int32_t *);
+ virtual int set_mp_tablesize(u_int32_t);
+ virtual void get_msgcall(void (**)(const DbEnv *, const char *));
+ virtual void set_msgcall(void (*)(const DbEnv *, const char *));
+ virtual void get_msgfile(FILE **);
+ virtual void set_msgfile(FILE *);
+ virtual int set_paniccall(void (*)(DbEnv *, int));
+ virtual int set_rpc_server(void *, char *, long, long, u_int32_t);
+ virtual int get_shm_key(long *);
+ virtual int set_shm_key(long);
+ virtual int get_timeout(db_timeout_t *, u_int32_t);
+ virtual int set_timeout(db_timeout_t, u_int32_t);
+ virtual int get_tmp_dir(const char **);
+ virtual int set_tmp_dir(const char *);
+ virtual int get_tx_max(u_int32_t *);
+ virtual int set_tx_max(u_int32_t);
+ virtual int get_app_dispatch(
+ int (**)(DbEnv *, Dbt *, DbLsn *, db_recops));
+ virtual int set_app_dispatch(int (*)(DbEnv *,
+ Dbt *, DbLsn *, db_recops));
+ virtual int get_tx_timestamp(time_t *);
+ virtual int set_tx_timestamp(time_t *);
+ virtual int get_verbose(u_int32_t which, int *);
+ virtual int set_verbose(u_int32_t which, int);
+
+ // Version information. A static method so it can be obtained anytime.
+ //
+ static char *version(int *major, int *minor, int *patch);
+
+ // Convert DB errors to strings
+ static char *strerror(int);
+
+ // If an error is detected and the error call function
+ // or stream is set, a message is dispatched or printed.
+ // If a prefix is set, each message is prefixed.
+ //
+ // You can use set_errcall() or set_errfile() above to control
+ // error functionality. Alternatively, you can call
+ // set_error_stream() to force all errors to a C++ stream.
+ // It is unwise to mix these approaches.
+ //
+ virtual __DB_STD(ostream) *get_error_stream();
+ virtual void set_error_stream(__DB_STD(ostream) *);
+ virtual __DB_STD(ostream) *get_message_stream();
+ virtual void set_message_stream(__DB_STD(ostream) *);
+
+ // used internally
+ static void runtime_error(DbEnv *dbenv, const char *caller, int err,
+ int error_policy);
+ static void runtime_error_dbt(DbEnv *dbenv, const char *caller, Dbt *dbt,
+ int error_policy);
+ static void runtime_error_lock_get(DbEnv *dbenv, const char *caller,
+ int err, db_lockop_t op, db_lockmode_t mode,
+ Dbt *obj, DbLock lock, int index,
+ int error_policy);
+
+ // Lock functions
+ //
+ virtual int lock_detect(u_int32_t flags, u_int32_t atype, int *aborted);
+ virtual int lock_get(u_int32_t locker, u_int32_t flags, Dbt *obj,
+ db_lockmode_t lock_mode, DbLock *lock);
+ virtual int lock_id(u_int32_t *idp);
+ virtual int lock_id_free(u_int32_t id);
+ virtual int lock_put(DbLock *lock);
+ virtual int lock_stat(DB_LOCK_STAT **statp, u_int32_t flags);
+ virtual int lock_stat_print(u_int32_t flags);
+ virtual int lock_vec(u_int32_t locker, u_int32_t flags,
+ DB_LOCKREQ list[], int nlist, DB_LOCKREQ **elistp);
+
+ // Log functions
+ //
+ virtual int log_archive(char **list[], u_int32_t flags);
+ static int log_compare(const DbLsn *lsn0, const DbLsn *lsn1);
+ virtual int log_cursor(DbLogc **cursorp, u_int32_t flags);
+ virtual int log_file(DbLsn *lsn, char *namep, size_t len);
+ virtual int log_flush(const DbLsn *lsn);
+ virtual int log_get_config(u_int32_t, int *);
+ virtual int log_put(DbLsn *lsn, const Dbt *data, u_int32_t flags);
+ virtual int log_printf(DbTxn *, const char *, ...);
+ virtual int log_set_config(u_int32_t, int);
+ virtual int log_stat(DB_LOG_STAT **spp, u_int32_t flags);
+ virtual int log_stat_print(u_int32_t flags);
+
+ // Mpool functions
+ //
+ virtual int memp_fcreate(DbMpoolFile **dbmfp, u_int32_t flags);
+ virtual int memp_register(int ftype,
+ pgin_fcn_type pgin_fcn,
+ pgout_fcn_type pgout_fcn);
+ virtual int memp_stat(DB_MPOOL_STAT
+ **gsp, DB_MPOOL_FSTAT ***fsp, u_int32_t flags);
+ virtual int memp_stat_print(u_int32_t flags);
+ virtual int memp_sync(DbLsn *lsn);
+ virtual int memp_trickle(int pct, int *nwrotep);
+
+ // Mpool functions
+ //
+ virtual int mutex_alloc(u_int32_t, db_mutex_t *);
+ virtual int mutex_free(db_mutex_t);
+ virtual int mutex_get_align(u_int32_t *);
+ virtual int mutex_get_increment(u_int32_t *);
+ virtual int mutex_get_max(u_int32_t *);
+ virtual int mutex_get_tas_spins(u_int32_t *);
+ virtual int mutex_lock(db_mutex_t);
+ virtual int mutex_set_align(u_int32_t);
+ virtual int mutex_set_increment(u_int32_t);
+ virtual int mutex_set_max(u_int32_t);
+ virtual int mutex_set_tas_spins(u_int32_t);
+ virtual int mutex_stat(DB_MUTEX_STAT **, u_int32_t);
+ virtual int mutex_stat_print(u_int32_t);
+ virtual int mutex_unlock(db_mutex_t);
+
+ // Transaction functions
+ //
+ virtual int txn_begin(DbTxn *pid, DbTxn **tid, u_int32_t flags);
+ virtual int txn_checkpoint(u_int32_t kbyte, u_int32_t min,
+ u_int32_t flags);
+ virtual int txn_recover(DbPreplist *preplist, u_int32_t count,
+ u_int32_t *retp, u_int32_t flags);
+ virtual int txn_stat(DB_TXN_STAT **statp, u_int32_t flags);
+ virtual int txn_stat_print(u_int32_t flags);
+
+ // Replication functions
+ //
+ virtual int rep_elect(u_int32_t, u_int32_t, u_int32_t);
+ virtual int rep_flush();
+ virtual int rep_process_message(Dbt *, Dbt *, int, DbLsn *);
+ virtual int rep_start(Dbt *, u_int32_t);
+ virtual int rep_stat(DB_REP_STAT **statp, u_int32_t flags);
+ virtual int rep_stat_print(u_int32_t flags);
+ virtual int rep_get_clockskew(u_int32_t *, u_int32_t *);
+ virtual int rep_set_clockskew(u_int32_t, u_int32_t);
+ virtual int rep_get_limit(u_int32_t *, u_int32_t *);
+ virtual int rep_set_limit(u_int32_t, u_int32_t);
+ virtual int rep_set_transport(int, int (*)(DbEnv *,
+ const Dbt *, const Dbt *, const DbLsn *, int, u_int32_t));
+ virtual int rep_set_request(u_int32_t, u_int32_t);
+ virtual int rep_get_request(u_int32_t *, u_int32_t *);
+ virtual int get_thread_count(u_int32_t *);
+ virtual int set_thread_count(u_int32_t);
+ virtual int get_thread_id_fn(
+ void (**)(DbEnv *, pid_t *, db_threadid_t *));
+ virtual int set_thread_id(void (*)(DbEnv *, pid_t *, db_threadid_t *));
+ virtual int get_thread_id_string_fn(
+ char *(**)(DbEnv *, pid_t, db_threadid_t, char *));
+ virtual int set_thread_id_string(char *(*)(DbEnv *,
+ pid_t, db_threadid_t, char *));
+ virtual int rep_set_config(u_int32_t, int);
+ virtual int rep_get_config(u_int32_t, int *);
+ virtual int rep_sync(u_int32_t flags);
+
+ // Advanced replication functions
+ //
+ virtual int rep_get_nsites(u_int32_t *n);
+ virtual int rep_set_nsites(u_int32_t n);
+ virtual int rep_get_priority(u_int32_t *priorityp);
+ virtual int rep_set_priority(u_int32_t priority);
+ virtual int rep_get_timeout(int which, db_timeout_t *timeout);
+ virtual int rep_set_timeout(int which, db_timeout_t timeout);
+ virtual int repmgr_add_remote_site(const char * host, u_int16_t port,
+ int *eidp, u_int32_t flags);
+ virtual int repmgr_get_ack_policy(int *policy);
+ virtual int repmgr_set_ack_policy(int policy);
+ virtual int repmgr_set_local_site(const char * host, u_int16_t port,
+ u_int32_t flags);
+ virtual int repmgr_site_list(u_int *countp, DB_REPMGR_SITE **listp);
+ virtual int repmgr_start(int nthreads, u_int32_t flags);
+ virtual int repmgr_stat(DB_REPMGR_STAT **statp, u_int32_t flags);
+ virtual int repmgr_stat_print(u_int32_t flags);
+
+ // Conversion functions
+ //
+ virtual ENV *get_ENV()
+ {
+ return imp_->env;
+ }
+
+ virtual DB_ENV *get_DB_ENV()
+ {
+ return imp_;
+ }
+
+ virtual const DB_ENV *get_const_DB_ENV() const
+ {
+ return imp_;
+ }
+
+ static DbEnv* get_DbEnv(DB_ENV *dbenv)
+ {
+ return dbenv ? (DbEnv *)dbenv->api1_internal : 0;
+ }
+
+ static const DbEnv* get_const_DbEnv(const DB_ENV *dbenv)
+ {
+ return dbenv ? (const DbEnv *)dbenv->api1_internal : 0;
+ }
+
+ u_int32_t get_create_flags() const
+ {
+ return construct_flags_;
+ }
+
+ // For internal use only.
+ static DbEnv* wrap_DB_ENV(DB_ENV *dbenv);
+
+ // These are public only because they need to be called
+ // via C functions. They should never be called by users
+ // of this class.
+ //
+ static int _app_dispatch_intercept(DB_ENV *dbenv, DBT *dbt, DB_LSN *lsn,
+ db_recops op);
+ static void _paniccall_intercept(DB_ENV *dbenv, int errval);
+ static void _feedback_intercept(DB_ENV *dbenv, int opcode, int pct);
+ static void _event_func_intercept(DB_ENV *dbenv, u_int32_t, void *);
+ static int _isalive_intercept(DB_ENV *dbenv, pid_t pid,
+ db_threadid_t thrid, u_int32_t flags);
+ static int _rep_send_intercept(DB_ENV *dbenv, const DBT *cntrl,
+ const DBT *data, const DB_LSN *lsn, int id, u_int32_t flags);
+ static void _stream_error_function(const DB_ENV *dbenv,
+ const char *prefix, const char *message);
+ static void _stream_message_function(const DB_ENV *dbenv,
+ const char *message);
+ static void _thread_id_intercept(DB_ENV *dbenv, pid_t *pidp,
+ db_threadid_t *thridp);
+ static char *_thread_id_string_intercept(DB_ENV *dbenv, pid_t pid,
+ db_threadid_t thrid, char *buf);
+
+private:
+ void cleanup();
+ int initialize(DB_ENV *dbenv);
+ int error_policy();
+
+ // For internal use only.
+ DbEnv(DB_ENV *, u_int32_t flags);
+
+ // no copying
+ DbEnv(const DbEnv &);
+ void operator = (const DbEnv &);
+
+ // instance data
+ DB_ENV *imp_;
+ int construct_error_;
+ u_int32_t construct_flags_;
+ __DB_STD(ostream) *error_stream_;
+ __DB_STD(ostream) *message_stream_;
+
+ int (*app_dispatch_callback_)(DbEnv *, Dbt *, DbLsn *, db_recops);
+ int (*isalive_callback_)(DbEnv *, pid_t, db_threadid_t, u_int32_t);
+ void (*error_callback_)(const DbEnv *, const char *, const char *);
+ void (*feedback_callback_)(DbEnv *, int, int);
+ void (*message_callback_)(const DbEnv *, const char *);
+ void (*paniccall_callback_)(DbEnv *, int);
+ void (*event_func_callback_)(DbEnv *, u_int32_t, void *);
+ int (*rep_send_callback_)(DbEnv *, const Dbt *, const Dbt *,
+ const DbLsn *, int, u_int32_t);
+ void (*thread_id_callback_)(DbEnv *, pid_t *, db_threadid_t *);
+ char *(*thread_id_string_callback_)(DbEnv *, pid_t, db_threadid_t,
+ char *);
+};
+
+//
+// Lock
+//
+class _exported DbLock
+{
+ friend class DbEnv;
+
+public:
+ DbLock();
+ DbLock(const DbLock &);
+ DbLock &operator = (const DbLock &);
+
+protected:
+ // We can add data to this class if needed
+ // since its contained class is not allocated by db.
+ // (see comment at top)
+
+ DbLock(DB_LOCK);
+ DB_LOCK lock_;
+};
+
+//
+// Log cursor
+//
+class _exported DbLogc : protected DB_LOGC
+{
+ friend class DbEnv;
+
+public:
+ int close(u_int32_t _flags);
+ int get(DbLsn *lsn, Dbt *data, u_int32_t _flags);
+ int version(u_int32_t *versionp, u_int32_t _flags);
+
+private:
+ // No data is permitted in this class (see comment at top)
+
+ // Note: use Db::cursor() to get pointers to a Dbc,
+ // and call Dbc::close() rather than delete to release them.
+ //
+ DbLogc();
+ ~DbLogc();
+
+ // no copying
+ DbLogc(const Dbc &);
+ DbLogc &operator = (const Dbc &);
+};
+
+//
+// Log sequence number
+//
+class _exported DbLsn : public DB_LSN
+{
+ friend class DbEnv; // friendship needed to cast to base class
+ friend class DbLogc; // friendship needed to cast to base class
+};
+
+//
+// Memory pool file
+//
+class _exported DbMpoolFile
+{
+ friend class DbEnv;
+ friend class Db;
+
+public:
+ int close(u_int32_t flags);
+ int get(db_pgno_t *pgnoaddr, DbTxn *txn, u_int32_t flags, void *pagep);
+ int get_clear_len(u_int32_t *len);
+ int get_fileid(u_int8_t *fileid);
+ int get_flags(u_int32_t *flagsp);
+ int get_ftype(int *ftype);
+ int get_last_pgno(db_pgno_t *pgnop);
+ int get_lsn_offset(int32_t *offsetp);
+ int get_maxsize(u_int32_t *gbytes, u_int32_t *bytes);
+ int get_pgcookie(DBT *dbt);
+ int get_priority(DB_CACHE_PRIORITY *priorityp);
+ int get_transactional(void);
+ int open(const char *file, u_int32_t flags, int mode, size_t pagesize);
+ int put(void *pgaddr, DB_CACHE_PRIORITY priority, u_int32_t flags);
+ int set_clear_len(u_int32_t len);
+ int set_fileid(u_int8_t *fileid);
+ int set_flags(u_int32_t flags, int onoff);
+ int set_ftype(int ftype);
+ int set_lsn_offset(int32_t offset);
+ int set_maxsize(u_int32_t gbytes, u_int32_t bytes);
+ int set_pgcookie(DBT *dbt);
+ int set_priority(DB_CACHE_PRIORITY priority);
+ int sync();
+
+ virtual DB_MPOOLFILE *get_DB_MPOOLFILE()
+ {
+ return imp_;
+ }
+
+ virtual const DB_MPOOLFILE *get_const_DB_MPOOLFILE() const
+ {
+ return imp_;
+ }
+
+private:
+ DB_MPOOLFILE *imp_;
+
+ // We can add data to this class if needed
+ // since it is implemented via a pointer.
+ // (see comment at top)
+
+ // Note: use DbEnv::memp_fcreate() to get pointers to a DbMpoolFile,
+ // and call DbMpoolFile::close() rather than delete to release them.
+ //
+ DbMpoolFile();
+
+ // Shut g++ up.
+protected:
+ virtual ~DbMpoolFile();
+
+private:
+ // no copying
+ DbMpoolFile(const DbMpoolFile &);
+ void operator = (const DbMpoolFile &);
+};
+
+//
+// This is filled in and returned by the DbEnv::txn_recover() method.
+//
+class _exported DbPreplist
+{
+public:
+ DbTxn *txn;
+ u_int8_t gid[DB_GID_SIZE];
+};
+
+//
+// A sequence record in a database
+//
+class _exported DbSequence
+{
+public:
+ DbSequence(Db *db, u_int32_t flags);
+ virtual ~DbSequence();
+
+ int open(DbTxn *txnid, Dbt *key, u_int32_t flags);
+ int initial_value(db_seq_t value);
+ int close(u_int32_t flags);
+ int remove(DbTxn *txnid, u_int32_t flags);
+ int stat(DB_SEQUENCE_STAT **sp, u_int32_t flags);
+ int stat_print(u_int32_t flags);
+
+ int get(DbTxn *txnid, int32_t delta, db_seq_t *retp, u_int32_t flags);
+ int get_cachesize(int32_t *sizep);
+ int set_cachesize(int32_t size);
+ int get_flags(u_int32_t *flagsp);
+ int set_flags(u_int32_t flags);
+ int get_range(db_seq_t *minp, db_seq_t *maxp);
+ int set_range(db_seq_t min, db_seq_t max);
+
+ Db *get_db();
+ Dbt *get_key();
+
+ virtual DB_SEQUENCE *get_DB_SEQUENCE()
+ {
+ return imp_;
+ }
+
+ virtual const DB_SEQUENCE *get_const_DB_SEQUENCE() const
+ {
+ return imp_;
+ }
+
+ static DbSequence* get_DbSequence(DB_SEQUENCE *seq)
+ {
+ return (DbSequence *)seq->api_internal;
+ }
+
+ static const DbSequence* get_const_DbSequence(const DB_SEQUENCE *seq)
+ {
+ return (const DbSequence *)seq->api_internal;
+ }
+
+ // For internal use only.
+ static DbSequence* wrap_DB_SEQUENCE(DB_SEQUENCE *seq);
+
+private:
+ DbSequence(DB_SEQUENCE *seq);
+ // no copying
+ DbSequence(const DbSequence &);
+ DbSequence &operator = (const DbSequence &);
+
+ DB_SEQUENCE *imp_;
+ DBT key_;
+};
+
+//
+// Transaction
+//
+class _exported DbTxn
+{
+ friend class DbEnv;
+
+public:
+ int abort();
+ int commit(u_int32_t flags);
+ int discard(u_int32_t flags);
+ u_int32_t id();
+ int get_name(const char **namep);
+ int prepare(u_int8_t *gid);
+ int set_name(const char *name);
+ int set_timeout(db_timeout_t timeout, u_int32_t flags);
+
+ virtual DB_TXN *get_DB_TXN()
+ {
+ return imp_;
+ }
+
+ virtual const DB_TXN *get_const_DB_TXN() const
+ {
+ return imp_;
+ }
+
+ static DbTxn* get_DbTxn(DB_TXN *txn)
+ {
+ return (DbTxn *)txn->api_internal;
+ }
+
+ static const DbTxn* get_const_DbTxn(const DB_TXN *txn)
+ {
+ return (const DbTxn *)txn->api_internal;
+ }
+
+ // For internal use only.
+ static DbTxn* wrap_DB_TXN(DB_TXN *txn);
+ void remove_child_txn(DbTxn *kid);
+ void add_child_txn(DbTxn *kid);
+
+ void set_parent(DbTxn *ptxn)
+ {
+ parent_txn_ = ptxn;
+ }
+
+private:
+ DB_TXN *imp_;
+
+ // We use a TAILQ to store this object's kids of DbTxn objects, and
+ // each kid has a "parent_txn_" to point to this DbTxn object.
+ //
+ // If imp_ has a parent transaction which is not wrapped by DbTxn
+ // class, parent_txn_ will be NULL since we don't need to maintain
+ // this parent-kid relationship. This relationship only helps to
+ // delete unresolved kids when the parent is resolved.
+ DbTxn *parent_txn_;
+
+ // We can add data to this class if needed
+ // since it is implemented via a pointer.
+ // (see comment at top)
+
+ // Note: use DbEnv::txn_begin() to get pointers to a DbTxn,
+ // and call DbTxn::abort() or DbTxn::commit rather than
+ // delete to release them.
+ //
+ DbTxn(DbTxn *ptxn);
+ // For internal use only.
+ DbTxn(DB_TXN *txn, DbTxn *ptxn);
+ virtual ~DbTxn();
+
+ // no copying
+ DbTxn(const DbTxn &);
+ void operator = (const DbTxn &);
+
+ /*
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * TAILQ_HEAD(__children, DbTxn) children;
+ */
+ struct __children {
+ DbTxn *tqh_first;
+ DbTxn **tqh_last;
+ } children;
+
+ /*
+ * !!!
+ * Explicit representations of structures from queue.h.
+ * TAILQ_ENTRY(DbTxn) child_entry;
+ */
+ struct {
+ DbTxn *tqe_next;
+ DbTxn **tqe_prev;
+ } child_entry;
+};
+
+//
+// A chunk of data, maybe a key or value.
+//
+class _exported Dbt : private DBT
+{
+ friend class Db;
+ friend class Dbc;
+ friend class DbEnv;
+ friend class DbLogc;
+ friend class DbSequence;
+
+public:
+ // key/data
+ void *get_data() const { return data; }
+ void set_data(void *value) { data = value; }
+
+ // key/data length
+ u_int32_t get_size() const { return size; }
+ void set_size(u_int32_t value) { size = value; }
+
+ // RO: length of user buffer.
+ u_int32_t get_ulen() const { return ulen; }
+ void set_ulen(u_int32_t value) { ulen = value; }
+
+ // RO: get/put record length.
+ u_int32_t get_dlen() const { return dlen; }
+ void set_dlen(u_int32_t value) { dlen = value; }
+
+ // RO: get/put record offset.
+ u_int32_t get_doff() const { return doff; }
+ void set_doff(u_int32_t value) { doff = value; }
+
+ // flags
+ u_int32_t get_flags() const { return flags; }
+ void set_flags(u_int32_t value) { flags = value; }
+
+ // Conversion functions
+ DBT *get_DBT() { return (DBT *)this; }
+ const DBT *get_const_DBT() const { return (const DBT *)this; }
+
+ static Dbt* get_Dbt(DBT *dbt) { return (Dbt *)dbt; }
+ static const Dbt* get_const_Dbt(const DBT *dbt)
+ { return (const Dbt *)dbt; }
+
+ Dbt(void *data, u_int32_t size);
+ Dbt();
+ ~Dbt();
+ Dbt(const Dbt &);
+ Dbt &operator = (const Dbt &);
+
+private:
+ // Note: no extra data appears in this class (other than
+ // inherited from DBT) since we need DBT and Dbt objects
+ // to have interchangable pointers.
+ //
+ // When subclassing this class, remember that callback
+ // methods like bt_compare, bt_prefix, dup_compare may
+ // internally manufacture DBT objects (which later are
+ // cast to Dbt), so such callbacks might receive objects
+ // not of your subclassed type.
+};
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// multiple key/data/recno iterator classes
+//
+
+// DbMultipleIterator is a shared private base class for the three types
+// of bulk-return Iterator; it should never be instantiated directly,
+// but it handles the functionality shared by its subclasses.
+class _exported DbMultipleIterator
+{
+public:
+ DbMultipleIterator(const Dbt &dbt);
+protected:
+ u_int8_t *data_;
+ u_int32_t *p_;
+};
+
+class _exported DbMultipleKeyDataIterator : private DbMultipleIterator
+{
+public:
+ DbMultipleKeyDataIterator(const Dbt &dbt) : DbMultipleIterator(dbt) {}
+ bool next(Dbt &key, Dbt &data);
+};
+
+class _exported DbMultipleRecnoDataIterator : private DbMultipleIterator
+{
+public:
+ DbMultipleRecnoDataIterator(const Dbt &dbt) : DbMultipleIterator(dbt) {}
+ bool next(db_recno_t &recno, Dbt &data);
+};
+
+class _exported DbMultipleDataIterator : private DbMultipleIterator
+{
+public:
+ DbMultipleDataIterator(const Dbt &dbt) : DbMultipleIterator(dbt) {}
+ bool next(Dbt &data);
+};
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// multiple key/data/recno builder classes
+//
+
+// DbMultipleBuilder is a shared private base class for the three types
+// of bulk buffer builders; it should never be instantiated directly,
+// but it handles the functionality shared by its subclasses.
+class _exported DbMultipleBuilder
+{
+public:
+ DbMultipleBuilder(Dbt &dbt);
+protected:
+ Dbt &dbt_;
+ void *p_;
+};
+
+class _exported DbMultipleDataBuilder : DbMultipleBuilder
+{
+public:
+ DbMultipleDataBuilder(Dbt &dbt) : DbMultipleBuilder(dbt) {}
+ bool append(void *dbuf, size_t dlen);
+ bool reserve(void *&ddest, size_t dlen);
+};
+
+class _exported DbMultipleKeyDataBuilder : DbMultipleBuilder
+{
+public:
+ DbMultipleKeyDataBuilder(Dbt &dbt) : DbMultipleBuilder(dbt) {}
+ bool append(void *kbuf, size_t klen, void *dbuf, size_t dlen);
+ bool reserve(void *&kdest, size_t klen, void *&ddest, size_t dlen);
+};
+
+class _exported DbMultipleRecnoDataBuilder
+{
+public:
+ DbMultipleRecnoDataBuilder(Dbt &dbt);
+ bool append(db_recno_t recno, void *dbuf, size_t dlen);
+ bool reserve(db_recno_t recno, void *&ddest, size_t dlen);
+protected:
+ Dbt &dbt_;
+ void *p_;
+};
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Exception classes
+//
+
+// Almost any error in the DB library throws a DbException.
+// Every exception should be considered an abnormality
+// (e.g. bug, misuse of DB, file system error).
+//
+class _exported DbException : public __DB_STD(exception)
+{
+public:
+ virtual ~DbException() throw();
+ DbException(int err);
+ DbException(const char *description);
+ DbException(const char *description, int err);
+ DbException(const char *prefix, const char *description, int err);
+ int get_errno() const;
+ virtual const char *what() const throw();
+ DbEnv *get_env() const;
+ void set_env(DbEnv *dbenv);
+
+ DbException(const DbException &);
+ DbException &operator = (const DbException &);
+
+private:
+ void describe(const char *prefix, const char *description);
+
+ char *what_;
+ int err_; // errno
+ DbEnv *dbenv_;
+};
+
+//
+// A specific sort of exception that occurs when
+// an operation is aborted to resolve a deadlock.
+//
+class _exported DbDeadlockException : public DbException
+{
+public:
+ virtual ~DbDeadlockException() throw();
+ DbDeadlockException(const char *description);
+
+ DbDeadlockException(const DbDeadlockException &);
+ DbDeadlockException &operator = (const DbDeadlockException &);
+};
+
+//
+// A specific sort of exception that occurs when
+// a lock is not granted, e.g. by lock_get or lock_vec.
+// Note that the Dbt is only live as long as the Dbt used
+// in the offending call.
+//
+class _exported DbLockNotGrantedException : public DbException
+{
+public:
+ virtual ~DbLockNotGrantedException() throw();
+ DbLockNotGrantedException(const char *prefix, db_lockop_t op,
+ db_lockmode_t mode, const Dbt *obj, const DbLock lock, int index);
+ DbLockNotGrantedException(const char *description);
+
+ DbLockNotGrantedException(const DbLockNotGrantedException &);
+ DbLockNotGrantedException &operator =
+ (const DbLockNotGrantedException &);
+
+ db_lockop_t get_op() const;
+ db_lockmode_t get_mode() const;
+ const Dbt* get_obj() const;
+ DbLock *get_lock() const;
+ int get_index() const;
+
+private:
+ db_lockop_t op_;
+ db_lockmode_t mode_;
+ const Dbt *obj_;
+ DbLock *lock_;
+ int index_;
+};
+
+//
+// A specific sort of exception that occurs when
+// user declared memory is insufficient in a Dbt.
+//
+class _exported DbMemoryException : public DbException
+{
+public:
+ virtual ~DbMemoryException() throw();
+ DbMemoryException(Dbt *dbt);
+ DbMemoryException(const char *prefix, Dbt *dbt);
+
+ DbMemoryException(const DbMemoryException &);
+ DbMemoryException &operator = (const DbMemoryException &);
+
+ Dbt *get_dbt() const;
+private:
+ Dbt *dbt_;
+};
+
+//
+// A specific sort of exception that occurs when a change of replication
+// master requires that all handles be re-opened.
+//
+class _exported DbRepHandleDeadException : public DbException
+{
+public:
+ virtual ~DbRepHandleDeadException() throw();
+ DbRepHandleDeadException(const char *description);
+
+ DbRepHandleDeadException(const DbRepHandleDeadException &);
+ DbRepHandleDeadException &operator = (const DbRepHandleDeadException &);
+};
+
+//
+// A specific sort of exception that occurs when
+// recovery is required before continuing DB activity.
+//
+class _exported DbRunRecoveryException : public DbException
+{
+public:
+ virtual ~DbRunRecoveryException() throw();
+ DbRunRecoveryException(const char *description);
+
+ DbRunRecoveryException(const DbRunRecoveryException &);
+ DbRunRecoveryException &operator = (const DbRunRecoveryException &);
+};
+
+//
+// A specific sort of exception that occurs when
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Restore default compiler warnings
+//
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#endif /* !_DB_CXX_H_ */
diff --git a/db-4.8.30/dbinc/db_dispatch.h b/db-4.8.30/dbinc/db_dispatch.h
new file mode 100644
index 0000000..91f83e6
--- /dev/null
+++ b/db-4.8.30/dbinc/db_dispatch.h
@@ -0,0 +1,97 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ * The President and Fellows of Harvard University. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_DISPATCH_H_
+#define _DB_DISPATCH_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Declarations and typedefs for the list of transaction IDs used during
+ * recovery. This is a generic list used to pass along whatever information
+ * we need during recovery.
+ */
+typedef enum {
+ TXNLIST_DELETE,
+ TXNLIST_LSN,
+ TXNLIST_TXNID
+} db_txnlist_type;
+
+#define DB_TXNLIST_MASK(hp, n) (n % hp->nslots)
+struct __db_txnhead {
+ void *td; /* If abort, the detail for the txn. */
+ DB_THREAD_INFO *thread_info; /* Thread information. */
+ u_int32_t maxid; /* Maximum transaction id. */
+ DB_LSN maxlsn; /* Maximum commit lsn. */
+ DB_LSN ckplsn; /* LSN of last retained checkpoint. */
+ DB_LSN trunc_lsn; /* Lsn to which we are going to truncate;
+ * make sure we abort anyone after this. */
+ u_int32_t generation; /* Current generation number. */
+ u_int32_t gen_alloc; /* Number of generations allocated. */
+ struct {
+ u_int32_t generation;
+ u_int32_t txn_min;
+ u_int32_t txn_max;
+ } *gen_array; /* Array of txnids associated with a gen. */
+ u_int nslots;
+ LIST_HEAD(__db_headlink, __db_txnlist) head[1];
+};
+
+#define DB_LSN_STACK_SIZE 4
+struct __db_txnlist {
+ db_txnlist_type type;
+ LIST_ENTRY(__db_txnlist) links;
+ union {
+ struct {
+ u_int32_t txnid;
+ u_int32_t generation;
+ u_int32_t status;
+ } t;
+ struct {
+ u_int32_t stack_size;
+ u_int32_t stack_indx;
+ DB_LSN *lsn_stack;
+ } l;
+ } u;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* !_DB_DISPATCH_H_ */
diff --git a/db-4.8.30/dbinc/db_int.in b/db-4.8.30/dbinc/db_int.in
new file mode 100644
index 0000000..744f9cf
--- /dev/null
+++ b/db-4.8.30/dbinc/db_int.in
@@ -0,0 +1,933 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_INT_H_
+#define _DB_INT_H_
+
+/*******************************************************
+ * Berkeley DB ANSI/POSIX include files.
+ *******************************************************/
+#ifdef HAVE_SYSTEM_INCLUDE_FILES
+#include <sys/types.h>
+#ifdef DIAG_MVCC
+#include <sys/mman.h>
+#endif
+#include <sys/stat.h>
+
+#if defined(__INCLUDE_SELECT_H)
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+#ifdef HAVE_VXWORKS
+#include <selectLib.h>
+#endif
+#endif
+
+#if TIME_WITH_SYS_TIME
+#include <sys/time.h>
+#include <time.h>
+#else
+#if HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <time.h>
+#endif
+#endif
+
+#ifdef HAVE_VXWORKS
+#include <net/uio.h>
+#else
+#include <sys/uio.h>
+#endif
+
+#if defined(__INCLUDE_NETWORKING)
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#include <netinet/in.h>
+#include <netdb.h>
+#include <arpa/inet.h>
+#endif
+
+#if defined(STDC_HEADERS) || defined(__cplusplus)
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if defined(__INCLUDE_DIRECTORY)
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif
+#endif /* __INCLUDE_DIRECTORY */
+
+#endif /* !HAVE_SYSTEM_INCLUDE_FILES */
+
+#ifdef DB_WIN32
+#include "dbinc/win_db.h"
+#endif
+
+#include "db.h"
+#include "clib_port.h"
+
+#include "dbinc/queue.h"
+#include "dbinc/shqueue.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*******************************************************
+ * Forward structure declarations.
+ *******************************************************/
+struct __db_reginfo_t; typedef struct __db_reginfo_t REGINFO;
+struct __db_txnhead; typedef struct __db_txnhead DB_TXNHEAD;
+struct __db_txnlist; typedef struct __db_txnlist DB_TXNLIST;
+struct __vrfy_childinfo;typedef struct __vrfy_childinfo VRFY_CHILDINFO;
+struct __vrfy_dbinfo; typedef struct __vrfy_dbinfo VRFY_DBINFO;
+struct __vrfy_pageinfo; typedef struct __vrfy_pageinfo VRFY_PAGEINFO;
+
+typedef SH_TAILQ_HEAD(__hash_head) DB_HASHTAB;
+
+/*******************************************************
+ * General purpose constants and macros.
+ *******************************************************/
+#undef FALSE
+#define FALSE 0
+#undef TRUE
+#define TRUE (!FALSE)
+
+#define MEGABYTE 1048576
+#define GIGABYTE 1073741824
+
+#define NS_PER_MS 1000000 /* Nanoseconds in a millisecond */
+#define NS_PER_US 1000 /* Nanoseconds in a microsecond */
+#define NS_PER_SEC 1000000000 /* Nanoseconds in a second */
+#define US_PER_MS 1000 /* Microseconds in a millisecond */
+#define US_PER_SEC 1000000 /* Microseconds in a second */
+#define MS_PER_SEC 1000 /* Milliseconds in a second */
+
+#define RECNO_OOB 0 /* Illegal record number. */
+
+/* Test for a power-of-two (tests true for zero, which doesn't matter here). */
+#define POWER_OF_TWO(x) (((x) & ((x) - 1)) == 0)
+
+/* Test for valid page sizes. */
+#define DB_MIN_PGSIZE 0x000200 /* Minimum page size (512). */
+#define DB_MAX_PGSIZE 0x010000 /* Maximum page size (65536). */
+#define IS_VALID_PAGESIZE(x) \
+ (POWER_OF_TWO(x) && (x) >= DB_MIN_PGSIZE && ((x) <= DB_MAX_PGSIZE))
+
+/* Minimum number of pages cached, by default. */
+#define DB_MINPAGECACHE 16
+
+/*
+ * If we are unable to determine the underlying filesystem block size, use
+ * 8K on the grounds that most OS's use less than 8K for a VM page size.
+ */
+#define DB_DEF_IOSIZE (8 * 1024)
+
+/* Align an integer to a specific boundary. */
+#undef DB_ALIGN
+#define DB_ALIGN(v, bound) \
+ (((v) + (bound) - 1) & ~(((uintmax_t)(bound)) - 1))
+
+/* Increment a pointer to a specific boundary. */
+#undef ALIGNP_INC
+#define ALIGNP_INC(p, bound) \
+ (void *)(((uintptr_t)(p) + (bound) - 1) & ~(((uintptr_t)(bound)) - 1))
+
+/*
+ * Print an address as a u_long (a u_long is the largest type we can print
+ * portably). Most 64-bit systems have made longs 64-bits, so this should
+ * work.
+ */
+#define P_TO_ULONG(p) ((u_long)(uintptr_t)(p))
+
+/*
+ * Convert a pointer to a small integral value.
+ *
+ * The (u_int16_t)(uintptr_t) cast avoids warnings: the (uintptr_t) cast
+ * converts the value to an integral type, and the (u_int16_t) cast converts
+ * it to a small integral type so we don't get complaints when we assign the
+ * final result to an integral type smaller than uintptr_t.
+ */
+#define P_TO_UINT32(p) ((u_int32_t)(uintptr_t)(p))
+#define P_TO_UINT16(p) ((u_int16_t)(uintptr_t)(p))
+
+/*
+ * There are several on-page structures that are declared to have a number of
+ * fields followed by a variable length array of items. The structure size
+ * without including the variable length array or the address of the first of
+ * those elements can be found using SSZ.
+ *
+ * This macro can also be used to find the offset of a structure element in a
+ * structure. This is used in various places to copy structure elements from
+ * unaligned memory references, e.g., pointers into a packed page.
+ *
+ * There are two versions because compilers object if you take the address of
+ * an array.
+ */
+#undef SSZ
+#define SSZ(name, field) P_TO_UINT16(&(((name *)0)->field))
+
+#undef SSZA
+#define SSZA(name, field) P_TO_UINT16(&(((name *)0)->field[0]))
+
+/* Structure used to print flag values. */
+typedef struct __fn {
+ u_int32_t mask; /* Flag value. */
+ const char *name; /* Flag name. */
+} FN;
+
+/* Set, clear and test flags. */
+#define FLD_CLR(fld, f) (fld) &= ~(f)
+#define FLD_ISSET(fld, f) ((fld) & (f))
+#define FLD_SET(fld, f) (fld) |= (f)
+#define F_CLR(p, f) (p)->flags &= ~(f)
+#define F_ISSET(p, f) ((p)->flags & (f))
+#define F_SET(p, f) (p)->flags |= (f)
+#define LF_CLR(f) ((flags) &= ~(f))
+#define LF_ISSET(f) ((flags) & (f))
+#define LF_SET(f) ((flags) |= (f))
+
+/*
+ * Calculate a percentage. The values can overflow 32-bit integer arithmetic
+ * so we use floating point.
+ *
+ * When calculating a bytes-vs-page size percentage, we're getting the inverse
+ * of the percentage in all cases, that is, we want 100 minus the percentage we
+ * calculate.
+ */
+#define DB_PCT(v, total) \
+ ((int)((total) == 0 ? 0 : ((double)(v) * 100) / (total)))
+#define DB_PCT_PG(v, total, pgsize) \
+ ((int)((total) == 0 ? 0 : \
+ 100 - ((double)(v) * 100) / (((double)total) * (pgsize))))
+
+/*
+ * Statistics update shared memory and so are expensive -- don't update the
+ * values unless we're going to display the results.
+ */
+#undef STAT
+#ifdef HAVE_STATISTICS
+#define STAT(x) x
+#else
+#define STAT(x)
+#endif
+
+/*
+ * Structure used for callback message aggregation.
+ *
+ * Display values in XXX_stat_print calls.
+ */
+typedef struct __db_msgbuf {
+ char *buf; /* Heap allocated buffer. */
+ char *cur; /* Current end of message. */
+ size_t len; /* Allocated length of buffer. */
+} DB_MSGBUF;
+#define DB_MSGBUF_INIT(a) do { \
+ (a)->buf = (a)->cur = NULL; \
+ (a)->len = 0; \
+} while (0)
+#define DB_MSGBUF_FLUSH(env, a) do { \
+ if ((a)->buf != NULL) { \
+ if ((a)->cur != (a)->buf) \
+ __db_msg(env, "%s", (a)->buf); \
+ __os_free(env, (a)->buf); \
+ DB_MSGBUF_INIT(a); \
+ } \
+} while (0)
+#define STAT_FMT(msg, fmt, type, v) do { \
+ DB_MSGBUF __mb; \
+ DB_MSGBUF_INIT(&__mb); \
+ __db_msgadd(env, &__mb, fmt, (type)(v)); \
+ __db_msgadd(env, &__mb, "\t%s", msg); \
+ DB_MSGBUF_FLUSH(env, &__mb); \
+} while (0)
+#define STAT_HEX(msg, v) \
+ __db_msg(env, "%#lx\t%s", (u_long)(v), msg)
+#define STAT_ISSET(msg, p) \
+ __db_msg(env, "%sSet\t%s", (p) == NULL ? "!" : " ", msg)
+#define STAT_LONG(msg, v) \
+ __db_msg(env, "%ld\t%s", (long)(v), msg)
+#define STAT_LSN(msg, lsnp) \
+ __db_msg(env, "%lu/%lu\t%s", \
+ (u_long)(lsnp)->file, (u_long)(lsnp)->offset, msg)
+#define STAT_POINTER(msg, v) \
+ __db_msg(env, "%#lx\t%s", P_TO_ULONG(v), msg)
+#define STAT_STRING(msg, p) do { \
+ const char *__p = p; /* p may be a function call. */ \
+ __db_msg(env, "%s\t%s", __p == NULL ? "!Set" : __p, msg); \
+} while (0)
+#define STAT_ULONG(msg, v) \
+ __db_msg(env, "%lu\t%s", (u_long)(v), msg)
+
+/*
+ * There are quite a few places in Berkeley DB where we want to initialize
+ * a DBT from a string or other random pointer type, using a length typed
+ * to size_t in most cases. This macro avoids a lot of casting. The macro
+ * comes in two flavors because we often want to clear the DBT first.
+ */
+#define DB_SET_DBT(dbt, d, s) do { \
+ (dbt).data = (void *)(d); \
+ (dbt).size = (u_int32_t)(s); \
+} while (0)
+#define DB_INIT_DBT(dbt, d, s) do { \
+ memset(&(dbt), 0, sizeof(dbt)); \
+ DB_SET_DBT(dbt, d, s); \
+} while (0)
+
+/*******************************************************
+ * API return values
+ *******************************************************/
+/*
+ * Return values that are OK for each different call. Most calls have a
+ * standard 'return of 0 is only OK value', but some, like db->get have
+ * DB_NOTFOUND as a return value, but it really isn't an error.
+ */
+#define DB_RETOK_STD(ret) ((ret) == 0)
+#define DB_RETOK_DBCDEL(ret) ((ret) == 0 || (ret) == DB_KEYEMPTY || \
+ (ret) == DB_NOTFOUND)
+#define DB_RETOK_DBCGET(ret) ((ret) == 0 || (ret) == DB_KEYEMPTY || \
+ (ret) == DB_NOTFOUND)
+#define DB_RETOK_DBCPUT(ret) ((ret) == 0 || (ret) == DB_KEYEXIST || \
+ (ret) == DB_NOTFOUND)
+#define DB_RETOK_DBDEL(ret) DB_RETOK_DBCDEL(ret)
+#define DB_RETOK_DBGET(ret) DB_RETOK_DBCGET(ret)
+#define DB_RETOK_DBPUT(ret) ((ret) == 0 || (ret) == DB_KEYEXIST)
+#define DB_RETOK_EXISTS(ret) DB_RETOK_DBCGET(ret)
+#define DB_RETOK_LGGET(ret) ((ret) == 0 || (ret) == DB_NOTFOUND)
+#define DB_RETOK_MPGET(ret) ((ret) == 0 || (ret) == DB_PAGE_NOTFOUND)
+#define DB_RETOK_REPPMSG(ret) ((ret) == 0 || \
+ (ret) == DB_REP_IGNORE || \
+ (ret) == DB_REP_ISPERM || \
+ (ret) == DB_REP_NEWMASTER || \
+ (ret) == DB_REP_NEWSITE || \
+ (ret) == DB_REP_NOTPERM)
+#define DB_RETOK_REPMGR_START(ret) ((ret) == 0 || (ret) == DB_REP_IGNORE)
+
+/* Find a reasonable operation-not-supported error. */
+#ifdef EOPNOTSUPP
+#define DB_OPNOTSUP EOPNOTSUPP
+#else
+#ifdef ENOTSUP
+#define DB_OPNOTSUP ENOTSUP
+#else
+#define DB_OPNOTSUP EINVAL
+#endif
+#endif
+
+/*******************************************************
+ * Files.
+ *******************************************************/
+/*
+ * We use 1024 as the maximum path length. It's too hard to figure out what
+ * the real path length is, as it was traditionally stored in <sys/param.h>,
+ * and that file isn't always available.
+ */
+#define DB_MAXPATHLEN 1024
+
+#define PATH_DOT "." /* Current working directory. */
+ /* Path separator character(s). */
+#define PATH_SEPARATOR "@PATH_SEPARATOR@"
+
+/*******************************************************
+ * Environment.
+ *******************************************************/
+/* Type passed to __db_appname(). */
+typedef enum {
+ DB_APP_NONE=0, /* No type (region). */
+ DB_APP_DATA, /* Data file. */
+ DB_APP_LOG, /* Log file. */
+ DB_APP_TMP, /* Temporary file. */
+ DB_APP_RECOVER /* We are in recovery. */
+} APPNAME;
+
+/*
+ * A set of macros to check if various functionality has been configured.
+ *
+ * ALIVE_ON The is_alive function is configured.
+ * CDB_LOCKING CDB product locking.
+ * CRYPTO_ON Security has been configured.
+ * LOCKING_ON Locking has been configured.
+ * LOGGING_ON Logging has been configured.
+ * MUTEX_ON Mutexes have been configured.
+ * MPOOL_ON Memory pool has been configured.
+ * REP_ON Replication has been configured.
+ * RPC_ON RPC has been configured.
+ * TXN_ON Transactions have been configured.
+ *
+ * REP_ON is more complex than most: if the BDB library was compiled without
+ * replication support, ENV->rep_handle will be NULL; if the BDB library has
+ * replication support, but it was not configured, the region reference will
+ * be NULL.
+ */
+#define ALIVE_ON(env) ((env)->dbenv->is_alive != NULL)
+#define CDB_LOCKING(env) F_ISSET(env, ENV_CDB)
+#define CRYPTO_ON(env) ((env)->crypto_handle != NULL)
+#define LOCKING_ON(env) ((env)->lk_handle != NULL)
+#define LOGGING_ON(env) ((env)->lg_handle != NULL)
+#define MPOOL_ON(env) ((env)->mp_handle != NULL)
+#define MUTEX_ON(env) ((env)->mutex_handle != NULL)
+#define REP_ON(env) \
+ ((env)->rep_handle != NULL && (env)->rep_handle->region != NULL)
+#define RPC_ON(dbenv) ((dbenv)->cl_handle != NULL)
+#define TXN_ON(env) ((env)->tx_handle != NULL)
+
+/*
+ * STD_LOCKING Standard locking, that is, locking was configured and CDB
+ * was not. We do not do locking in off-page duplicate trees,
+ * so we check for that in the cursor first.
+ */
+#define STD_LOCKING(dbc) \
+ (!F_ISSET(dbc, DBC_OPD) && \
+ !CDB_LOCKING((dbc)->env) && LOCKING_ON((dbc)->env))
+
+/*
+ * IS_RECOVERING: The system is running recovery.
+ */
+#define IS_RECOVERING(env) \
+ (LOGGING_ON(env) && F_ISSET((env)->lg_handle, DBLOG_RECOVER))
+
+/* Initialization methods are often illegal before/after open is called. */
+#define ENV_ILLEGAL_AFTER_OPEN(env, name) \
+ if (F_ISSET((env), ENV_OPEN_CALLED)) \
+ return (__db_mi_open(env, name, 1));
+#define ENV_ILLEGAL_BEFORE_OPEN(env, name) \
+ if (!F_ISSET((env), ENV_OPEN_CALLED)) \
+ return (__db_mi_open(env, name, 0));
+
+/* We're not actually user hostile, honest. */
+#define ENV_REQUIRES_CONFIG(env, handle, i, flags) \
+ if (handle == NULL) \
+ return (__env_not_config(env, i, flags));
+#define ENV_REQUIRES_CONFIG_XX(env, handle, i, flags) \
+ if ((env)->handle->region == NULL) \
+ return (__env_not_config(env, i, flags));
+#define ENV_NOT_CONFIGURED(env, handle, i, flags) \
+ if (F_ISSET((env), ENV_OPEN_CALLED)) \
+ ENV_REQUIRES_CONFIG(env, handle, i, flags)
+
+#define ENV_ENTER(env, ip) do { \
+ int __ret; \
+ PANIC_CHECK(env); \
+ if ((env)->thr_hashtab == NULL) \
+ ip = NULL; \
+ else { \
+ if ((__ret = \
+ __env_set_state(env, &(ip), THREAD_ACTIVE)) != 0) \
+ return (__ret); \
+ } \
+} while (0)
+
+#define FAILCHK_THREAD(env, ip) do { \
+ if ((ip) != NULL) \
+ (ip)->dbth_state = THREAD_FAILCHK; \
+} while (0)
+
+#define ENV_GET_THREAD_INFO(env, ip) ENV_ENTER(env, ip)
+
+#ifdef DIAGNOSTIC
+#define ENV_LEAVE(env, ip) do { \
+ if ((ip) != NULL) { \
+ DB_ASSERT(env, ((ip)->dbth_state == THREAD_ACTIVE || \
+ (ip)->dbth_state == THREAD_FAILCHK)); \
+ (ip)->dbth_state = THREAD_OUT; \
+ } \
+} while (0)
+#else
+#define ENV_LEAVE(env, ip) do { \
+ if ((ip) != NULL) \
+ (ip)->dbth_state = THREAD_OUT; \
+} while (0)
+#endif
+#ifdef DIAGNOSTIC
+#define CHECK_THREAD(env) do { \
+ if ((env)->thr_hashtab != NULL) \
+ (void)__env_set_state(env, NULL, THREAD_VERIFY); \
+} while (0)
+#ifdef HAVE_STATISTICS
+#define CHECK_MTX_THREAD(env, mtx) do { \
+ if (mtx->alloc_id != MTX_MUTEX_REGION && \
+ mtx->alloc_id != MTX_ENV_REGION && \
+ mtx->alloc_id != MTX_APPLICATION) \
+ CHECK_THREAD(env); \
+} while (0)
+#else
+#define CHECK_MTX_THREAD(env, mtx)
+#endif
+#else
+#define CHECK_THREAD(env)
+#define CHECK_MTX_THREAD(env, mtx)
+#endif
+
+typedef enum {
+ THREAD_SLOT_NOT_IN_USE=0,
+ THREAD_OUT,
+ THREAD_ACTIVE,
+ THREAD_BLOCKED,
+ THREAD_BLOCKED_DEAD,
+ THREAD_FAILCHK,
+ THREAD_VERIFY
+} DB_THREAD_STATE;
+
+typedef struct __pin_list {
+ roff_t b_ref; /* offset to buffer. */
+ int region; /* region containing buffer. */
+} PIN_LIST;
+#define PINMAX 4
+
+struct __db_thread_info {
+ pid_t dbth_pid;
+ db_threadid_t dbth_tid;
+ DB_THREAD_STATE dbth_state;
+ SH_TAILQ_ENTRY dbth_links;
+ /*
+ * The following fields track which buffers this thread of
+ * control has pinned in the mpool buffer cache.
+ */
+ u_int16_t dbth_pincount; /* Number of pins for this thread. */
+ u_int16_t dbth_pinmax; /* Number of slots allocated. */
+ roff_t dbth_pinlist; /* List of pins. */
+ PIN_LIST dbth_pinarray[PINMAX]; /* Initial array of slots. */
+};
+
+typedef struct __env_thread_info {
+ u_int32_t thr_count;
+ u_int32_t thr_max;
+ u_int32_t thr_nbucket;
+ roff_t thr_hashoff;
+} THREAD_INFO;
+
+#define DB_EVENT(env, e, einfo) do { \
+ DB_ENV *__dbenv = (env)->dbenv; \
+ if (__dbenv->db_event_func != NULL) \
+ __dbenv->db_event_func(__dbenv, e, einfo); \
+} while (0)
+
+typedef struct __flag_map {
+ u_int32_t inflag, outflag;
+} FLAG_MAP;
+
+/*
+ * Internal database environment structure.
+ *
+ * This is the private database environment handle. The public environment
+ * handle is the DB_ENV structure. The library owns this structure, the user
+ * owns the DB_ENV structure. The reason there are two structures is because
+ * the user's configuration outlives any particular DB_ENV->open call, and
+ * separate structures allows us to easily discard internal information without
+ * discarding the user's configuration.
+ */
+struct __env {
+ DB_ENV *dbenv; /* Linked DB_ENV structure */
+
+ /*
+ * The ENV structure can be used concurrently, so field access is
+ * protected.
+ */
+ db_mutex_t mtx_env; /* ENV structure mutex */
+
+ /*
+ * Some fields are included in the ENV structure rather than in the
+ * DB_ENV structure because they are only set as arguments to the
+ * DB_ENV->open method. In other words, because of the historic API,
+ * not for any rational reason.
+ *
+ * Arguments to DB_ENV->open.
+ */
+ char *db_home; /* Database home */
+ u_int32_t open_flags; /* Flags */
+ int db_mode; /* Default open permissions */
+
+ pid_t pid_cache; /* Cached process ID */
+
+ DB_FH *lockfhp; /* fcntl(2) locking file handle */
+
+ DB_LOCKER *env_lref; /* Locker in non-threaded handles */
+
+ DB_DISTAB recover_dtab; /* Dispatch table for recover funcs */
+
+ int dir_mode; /* Intermediate directory perms. */
+
+ /* Thread tracking */
+ u_int32_t thr_nbucket; /* Number of hash buckets */
+ DB_HASHTAB *thr_hashtab; /* Hash table of DB_THREAD_INFO */
+
+ /* Mutex allocation */
+ struct {
+ int alloc_id; /* Allocation ID argument */
+ u_int32_t flags; /* Flags argument */
+ } *mutex_iq; /* Initial mutexes queue */
+ u_int mutex_iq_next; /* Count of initial mutexes */
+ u_int mutex_iq_max; /* Maximum initial mutexes */
+
+ /*
+ * List of open DB handles for this ENV, used for cursor
+ * adjustment. Must be protected for multi-threaded support.
+ */
+ db_mutex_t mtx_dblist;
+ int db_ref; /* DB handle reference count */
+ TAILQ_HEAD(__dblist, __db) dblist;
+
+ /*
+ * List of open file handles for this ENV. Must be protected
+ * for multi-threaded support.
+ */
+ TAILQ_HEAD(__fdlist, __fh_t) fdlist;
+
+ db_mutex_t mtx_mt; /* Mersenne Twister mutex */
+ int mti; /* Mersenne Twister index */
+ u_long *mt; /* Mersenne Twister state vector */
+
+ DB_CIPHER *crypto_handle; /* Crypto handle */
+ DB_LOCKTAB *lk_handle; /* Lock handle */
+ DB_LOG *lg_handle; /* Log handle */
+ DB_MPOOL *mp_handle; /* Mpool handle */
+ DB_MUTEXMGR *mutex_handle; /* Mutex handle */
+ DB_REP *rep_handle; /* Replication handle */
+ DB_TXNMGR *tx_handle; /* Txn handle */
+
+ /* Application callback to copy data to/from a custom data source */
+#define DB_USERCOPY_GETDATA 0x0001
+#define DB_USERCOPY_SETDATA 0x0002
+ int (*dbt_usercopy)
+ __P((DBT *, u_int32_t, void *, u_int32_t, u_int32_t));
+
+ REGINFO *reginfo; /* REGINFO structure reference */
+
+#define DB_TEST_ELECTINIT 1 /* after __rep_elect_init */
+#define DB_TEST_ELECTVOTE1 2 /* after sending VOTE1 */
+#define DB_TEST_POSTDESTROY 3 /* after destroy op */
+#define DB_TEST_POSTLOG 4 /* after logging all pages */
+#define DB_TEST_POSTLOGMETA 5 /* after logging meta in btree */
+#define DB_TEST_POSTOPEN 6 /* after __os_open */
+#define DB_TEST_POSTSYNC 7 /* after syncing the log */
+#define DB_TEST_PREDESTROY 8 /* before destroy op */
+#define DB_TEST_PREOPEN 9 /* before __os_open */
+#define DB_TEST_SUBDB_LOCKS 10 /* subdb locking tests */
+ int test_abort; /* Abort value for testing */
+ int test_check; /* Checkpoint value for testing */
+ int test_copy; /* Copy value for testing */
+
+#define ENV_CDB 0x00000001 /* DB_INIT_CDB */
+#define ENV_DBLOCAL 0x00000002 /* Environment for a private DB */
+#define ENV_LITTLEENDIAN 0x00000004 /* Little endian system. */
+#define ENV_LOCKDOWN 0x00000008 /* DB_LOCKDOWN set */
+#define ENV_NO_OUTPUT_SET 0x00000010 /* No output channel set */
+#define ENV_OPEN_CALLED 0x00000020 /* DB_ENV->open called */
+#define ENV_PRIVATE 0x00000040 /* DB_PRIVATE set */
+#define ENV_RECOVER_FATAL 0x00000080 /* Doing fatal recovery in env */
+#define ENV_REF_COUNTED 0x00000100 /* Region references this handle */
+#define ENV_SYSTEM_MEM 0x00000200 /* DB_SYSTEM_MEM set */
+#define ENV_THREAD 0x00000400 /* DB_THREAD set */
+ u_int32_t flags;
+};
+
+/*******************************************************
+ * Database Access Methods.
+ *******************************************************/
+/*
+ * DB_IS_THREADED --
+ * The database handle is free-threaded (was opened with DB_THREAD).
+ */
+#define DB_IS_THREADED(dbp) \
+ ((dbp)->mutex != MUTEX_INVALID)
+
+/* Initialization methods are often illegal before/after open is called. */
+#define DB_ILLEGAL_AFTER_OPEN(dbp, name) \
+ if (F_ISSET((dbp), DB_AM_OPEN_CALLED)) \
+ return (__db_mi_open((dbp)->env, name, 1));
+#define DB_ILLEGAL_BEFORE_OPEN(dbp, name) \
+ if (!F_ISSET((dbp), DB_AM_OPEN_CALLED)) \
+ return (__db_mi_open((dbp)->env, name, 0));
+/* Some initialization methods are illegal if environment isn't local. */
+#define DB_ILLEGAL_IN_ENV(dbp, name) \
+ if (!F_ISSET((dbp)->env, ENV_DBLOCAL)) \
+ return (__db_mi_env((dbp)->env, name));
+#define DB_ILLEGAL_METHOD(dbp, flags) { \
+ int __ret; \
+ if ((__ret = __dbh_am_chk(dbp, flags)) != 0) \
+ return (__ret); \
+}
+
+/*
+ * Common DBC->internal fields. Each access method adds additional fields
+ * to this list, but the initial fields are common.
+ */
+#define __DBC_INTERNAL \
+ DBC *opd; /* Off-page duplicate cursor. */\
+ DBC *pdbc; /* Pointer to parent cursor. */ \
+ \
+ void *page; /* Referenced page. */ \
+ u_int32_t part; /* Partition number. */ \
+ db_pgno_t root; /* Tree root. */ \
+ db_pgno_t pgno; /* Referenced page number. */ \
+ db_indx_t indx; /* Referenced key item index. */\
+ \
+ /* Streaming -- cache last position. */ \
+ db_pgno_t stream_start_pgno; /* Last start pgno. */ \
+ u_int32_t stream_off; /* Current offset. */ \
+ db_pgno_t stream_curr_pgno; /* Current overflow page. */ \
+ \
+ DB_LOCK lock; /* Cursor lock. */ \
+ db_lockmode_t lock_mode; /* Lock mode. */
+
+struct __dbc_internal {
+ __DBC_INTERNAL
+};
+
+/* Actions that __db_master_update can take. */
+typedef enum { MU_REMOVE, MU_RENAME, MU_OPEN } mu_action;
+
+/*
+ * Access-method-common macro for determining whether a cursor
+ * has been initialized.
+ */
+#ifdef HAVE_PARTITION
+#define IS_INITIALIZED(dbc) (DB_IS_PARTITIONED((dbc)->dbp) ? \
+ ((PART_CURSOR *)(dbc)->internal)->sub_cursor != NULL && \
+ ((PART_CURSOR *)(dbc)->internal)->sub_cursor-> \
+ internal->pgno != PGNO_INVALID : \
+ (dbc)->internal->pgno != PGNO_INVALID)
+#else
+#define IS_INITIALIZED(dbc) ((dbc)->internal->pgno != PGNO_INVALID)
+#endif
+
+/* Free the callback-allocated buffer, if necessary, hanging off of a DBT. */
+#define FREE_IF_NEEDED(env, dbt) \
+ if (F_ISSET((dbt), DB_DBT_APPMALLOC)) { \
+ __os_ufree((env), (dbt)->data); \
+ F_CLR((dbt), DB_DBT_APPMALLOC); \
+ }
+
+/*
+ * Use memory belonging to object "owner" to return the results of
+ * any no-DBT-flag get ops on cursor "dbc".
+ */
+#define SET_RET_MEM(dbc, owner) \
+ do { \
+ (dbc)->rskey = &(owner)->my_rskey; \
+ (dbc)->rkey = &(owner)->my_rkey; \
+ (dbc)->rdata = &(owner)->my_rdata; \
+ } while (0)
+
+/* Use the return-data memory src is currently set to use in dest as well. */
+#define COPY_RET_MEM(src, dest) \
+ do { \
+ (dest)->rskey = (src)->rskey; \
+ (dest)->rkey = (src)->rkey; \
+ (dest)->rdata = (src)->rdata; \
+ } while (0)
+
+/* Reset the returned-memory pointers to their defaults. */
+#define RESET_RET_MEM(dbc) \
+ do { \
+ (dbc)->rskey = &(dbc)->my_rskey; \
+ (dbc)->rkey = &(dbc)->my_rkey; \
+ (dbc)->rdata = &(dbc)->my_rdata; \
+ } while (0)
+
+/*******************************************************
+ * Mpool.
+ *******************************************************/
+/*
+ * File types for DB access methods. Negative numbers are reserved to DB.
+ */
+#define DB_FTYPE_SET -1 /* Call pgin/pgout functions. */
+#define DB_FTYPE_NOTSET 0 /* Don't call... */
+#define DB_LSN_OFF_NOTSET -1 /* Not yet set. */
+#define DB_CLEARLEN_NOTSET UINT32_MAX /* Not yet set. */
+
+/* Structure used as the DB pgin/pgout pgcookie. */
+typedef struct __dbpginfo {
+ size_t db_pagesize; /* Underlying page size. */
+ u_int32_t flags; /* Some DB_AM flags needed. */
+ DBTYPE type; /* DB type */
+} DB_PGINFO;
+
+/*******************************************************
+ * Log.
+ *******************************************************/
+/* Initialize an LSN to 'zero'. */
+#define ZERO_LSN(LSN) do { \
+ (LSN).file = 0; \
+ (LSN).offset = 0; \
+} while (0)
+#define IS_ZERO_LSN(LSN) ((LSN).file == 0 && (LSN).offset == 0)
+
+#define IS_INIT_LSN(LSN) ((LSN).file == 1 && (LSN).offset == 0)
+#define INIT_LSN(LSN) do { \
+ (LSN).file = 1; \
+ (LSN).offset = 0; \
+} while (0)
+
+#define MAX_LSN(LSN) do { \
+ (LSN).file = UINT32_MAX; \
+ (LSN).offset = UINT32_MAX; \
+} while (0)
+#define IS_MAX_LSN(LSN) \
+ ((LSN).file == UINT32_MAX && (LSN).offset == UINT32_MAX)
+
+/* If logging is turned off, smash the lsn. */
+#define LSN_NOT_LOGGED(LSN) do { \
+ (LSN).file = 0; \
+ (LSN).offset = 1; \
+} while (0)
+#define IS_NOT_LOGGED_LSN(LSN) \
+ ((LSN).file == 0 && (LSN).offset == 1)
+
+/*
+ * LOG_COMPARE -- compare two LSNs.
+ */
+
+#define LOG_COMPARE(lsn0, lsn1) \
+ ((lsn0)->file != (lsn1)->file ? \
+ ((lsn0)->file < (lsn1)->file ? -1 : 1) : \
+ ((lsn0)->offset != (lsn1)->offset ? \
+ ((lsn0)->offset < (lsn1)->offset ? -1 : 1) : 0))
+
+/*******************************************************
+ * Txn.
+ *******************************************************/
+#define DB_NONBLOCK(C) ((C)->txn != NULL && F_ISSET((C)->txn, TXN_NOWAIT))
+#define NOWAIT_FLAG(txn) \
+ ((txn) != NULL && F_ISSET((txn), TXN_NOWAIT) ? DB_LOCK_NOWAIT : 0)
+#define IS_REAL_TXN(txn) \
+ ((txn) != NULL && !F_ISSET(txn, TXN_CDSGROUP))
+#define IS_SUBTRANSACTION(txn) \
+ ((txn) != NULL && (txn)->parent != NULL)
+
+/*******************************************************
+ * Crypto.
+ *******************************************************/
+#define DB_IV_BYTES 16 /* Bytes per IV */
+#define DB_MAC_KEY 20 /* Bytes per MAC checksum */
+
+/*******************************************************
+ * Compression
+ *******************************************************/
+#define CMP_INT_SPARE_VAL 0xFC /* Smallest byte value that the integer
+ compression algorithm doesn't use */
+
+/*******************************************************
+ * Secondaries over RPC.
+ *******************************************************/
+#ifdef CONFIG_TEST
+/*
+ * These are flags passed to DB->associate calls by the Tcl API if running
+ * over RPC. The RPC server will mask out these flags before making the real
+ * DB->associate call.
+ *
+ * These flags must coexist with the valid flags to DB->associate (currently
+ * DB_AUTO_COMMIT and DB_CREATE). DB_AUTO_COMMIT is in the group of
+ * high-order shared flags (0xff000000), and DB_CREATE is in the low-order
+ * group (0x00000fff), so we pick a range in between.
+ */
+#define DB_RPC2ND_MASK 0x00f00000 /* Reserved bits. */
+
+#define DB_RPC2ND_REVERSEDATA 0x00100000 /* callback_n(0) _s_reversedata. */
+#define DB_RPC2ND_NOOP 0x00200000 /* callback_n(1) _s_noop */
+#define DB_RPC2ND_CONCATKEYDATA 0x00300000 /* callback_n(2) _s_concatkeydata */
+#define DB_RPC2ND_CONCATDATAKEY 0x00400000 /* callback_n(3) _s_concatdatakey */
+#define DB_RPC2ND_REVERSECONCAT 0x00500000 /* callback_n(4) _s_reverseconcat */
+#define DB_RPC2ND_TRUNCDATA 0x00600000 /* callback_n(5) _s_truncdata */
+#define DB_RPC2ND_CONSTANT 0x00700000 /* callback_n(6) _s_constant */
+#define DB_RPC2ND_GETZIP 0x00800000 /* sj_getzip */
+#define DB_RPC2ND_GETNAME 0x00900000 /* sj_getname */
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+/*******************************************************
+ * Remaining general DB includes.
+ *******************************************************/
+@db_int_def@
+
+#include "dbinc/globals.h"
+#include "dbinc/clock.h"
+#include "dbinc/debug.h"
+#include "dbinc/region.h"
+#include "dbinc_auto/env_ext.h"
+#include "dbinc/mutex.h"
+#ifdef HAVE_REPLICATION_THREADS
+#include "dbinc/repmgr.h"
+#endif
+#include "dbinc/rep.h"
+#include "dbinc/os.h"
+#include "dbinc_auto/clib_ext.h"
+#include "dbinc_auto/common_ext.h"
+
+/*******************************************************
+ * Remaining Log.
+ * These need to be defined after the general includes
+ * because they need rep.h from above.
+ *******************************************************/
+/*
+ * Test if the environment is currently logging changes. If we're in recovery
+ * or we're a replication client, we don't need to log changes because they're
+ * already in the log, even though we have a fully functional log system.
+ */
+#define DBENV_LOGGING(env) \
+ (LOGGING_ON(env) && !IS_REP_CLIENT(env) && (!IS_RECOVERING(env)))
+
+/*
+ * Test if we need to log a change. By default, we don't log operations without
+ * associated transactions, unless DIAGNOSTIC, DEBUG_ROP or DEBUG_WOP are on.
+ * This is because we want to get log records for read/write operations, and, if
+ * we are trying to debug something, more information is always better.
+ *
+ * The DBC_RECOVER flag is set when we're in abort, as well as during recovery;
+ * thus DBC_LOGGING may be false for a particular dbc even when DBENV_LOGGING
+ * is true.
+ *
+ * We explicitly use LOGGING_ON/IS_REP_CLIENT here because we don't want to pull
+ * in the log headers, which IS_RECOVERING (and thus DBENV_LOGGING) rely on, and
+ * because DBC_RECOVER should be set anytime IS_RECOVERING would be true.
+ *
+ * If we're not in recovery (master - doing an abort or a client applying
+ * a txn), then a client's only path through here is on an internal
+ * operation, and a master's only path through here is a transactional
+ * operation. Detect if either is not the case.
+ */
+#if defined(DIAGNOSTIC) || defined(DEBUG_ROP) || defined(DEBUG_WOP)
+#define DBC_LOGGING(dbc) __dbc_logging(dbc)
+#else
+#define DBC_LOGGING(dbc) \
+ ((dbc)->txn != NULL && LOGGING_ON((dbc)->env) && \
+ !F_ISSET((dbc), DBC_RECOVER) && !IS_REP_CLIENT((dbc)->env))
+#endif
+
+#endif /* !_DB_INT_H_ */
diff --git a/db-4.8.30/dbinc/db_join.h b/db-4.8.30/dbinc/db_join.h
new file mode 100644
index 0000000..06bab08
--- /dev/null
+++ b/db-4.8.30/dbinc/db_join.h
@@ -0,0 +1,37 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_JOIN_H_
+#define _DB_JOIN_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Joins use a join cursor that is similar to a regular DB cursor except
+ * that it only supports c_get and c_close functionality. Also, it does
+ * not support the full range of flags for get.
+ */
+typedef struct __join_cursor {
+ u_int8_t *j_exhausted; /* Array of flags; is cursor i exhausted? */
+ DBC **j_curslist; /* Array of cursors in the join: constant. */
+ DBC **j_fdupcurs; /* Cursors w/ first instances of current dup. */
+ DBC **j_workcurs; /* Scratch cursor copies to muck with. */
+ DB *j_primary; /* Primary dbp. */
+ DBT j_key; /* Used to do lookups. */
+ DBT j_rdata; /* Memory used for data return. */
+ u_int32_t j_ncurs; /* How many cursors do we have? */
+#define JOIN_RETRY 0x01 /* Error on primary get; re-return same key. */
+ u_int32_t flags;
+} JOIN_CURSOR;
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_JOIN_H_ */
diff --git a/db-4.8.30/dbinc/db_page.h b/db-4.8.30/dbinc/db_page.h
new file mode 100644
index 0000000..45d06c9
--- /dev/null
+++ b/db-4.8.30/dbinc/db_page.h
@@ -0,0 +1,672 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_PAGE_H_
+#define _DB_PAGE_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * DB page formats.
+ *
+ * !!!
+ * This implementation requires that values within the following structures
+ * NOT be padded -- note, ANSI C permits random padding within structures.
+ * If your compiler pads randomly you can just forget ever making DB run on
+ * your system. In addition, no data type can require larger alignment than
+ * its own size, e.g., a 4-byte data element may not require 8-byte alignment.
+ *
+ * Note that key/data lengths are often stored in db_indx_t's -- this is
+ * not accidental, nor does it limit the key/data size. If the key/data
+ * item fits on a page, it's guaranteed to be small enough to fit into a
+ * db_indx_t, and storing it in one saves space.
+ */
+
+#define PGNO_INVALID 0 /* Invalid page number in any database. */
+#define PGNO_BASE_MD 0 /* Base database: metadata page number. */
+
+/* Page types. */
+#define P_INVALID 0 /* Invalid page type. */
+#define __P_DUPLICATE 1 /* Duplicate. DEPRECATED in 3.1 */
+#define P_HASH_UNSORTED 2 /* Hash pages created pre 4.6. DEPRECATED */
+#define P_IBTREE 3 /* Btree internal. */
+#define P_IRECNO 4 /* Recno internal. */
+#define P_LBTREE 5 /* Btree leaf. */
+#define P_LRECNO 6 /* Recno leaf. */
+#define P_OVERFLOW 7 /* Overflow. */
+#define P_HASHMETA 8 /* Hash metadata page. */
+#define P_BTREEMETA 9 /* Btree metadata page. */
+#define P_QAMMETA 10 /* Queue metadata page. */
+#define P_QAMDATA 11 /* Queue data page. */
+#define P_LDUP 12 /* Off-page duplicate leaf. */
+#define P_HASH 13 /* Sorted hash page. */
+#define P_PAGETYPE_MAX 14
+/* Flag to __db_new */
+#define P_DONTEXTEND 0x8000 /* Don't allocate if there are no free pages. */
+
+/*
+ * When we create pages in mpool, we ask mpool to clear some number of bytes
+ * in the header. This number must be at least as big as the regular page
+ * headers and cover enough of the btree and hash meta-data pages to obliterate
+ * the page type.
+ */
+#define DB_PAGE_DB_LEN 32
+#define DB_PAGE_QUEUE_LEN 0
+
+/************************************************************************
+ GENERIC METADATA PAGE HEADER
+ *
+ * !!!
+ * The magic and version numbers have to be in the same place in all versions
+ * of the metadata page as the application may not have upgraded the database.
+ ************************************************************************/
+typedef struct _dbmeta33 {
+ DB_LSN lsn; /* 00-07: LSN. */
+ db_pgno_t pgno; /* 08-11: Current page number. */
+ u_int32_t magic; /* 12-15: Magic number. */
+ u_int32_t version; /* 16-19: Version. */
+ u_int32_t pagesize; /* 20-23: Pagesize. */
+ u_int8_t encrypt_alg; /* 24: Encryption algorithm. */
+ u_int8_t type; /* 25: Page type. */
+#define DBMETA_CHKSUM 0x01
+#define DBMETA_PART_RANGE 0x02
+#define DBMETA_PART_CALLBACK 0x04
+ u_int8_t metaflags; /* 26: Meta-only flags */
+ u_int8_t unused1; /* 27: Unused. */
+ u_int32_t free; /* 28-31: Free list page number. */
+ db_pgno_t last_pgno; /* 32-35: Page number of last page in db. */
+ u_int32_t nparts; /* 36-39: Number of partitions. */
+ u_int32_t key_count; /* 40-43: Cached key count. */
+ u_int32_t record_count; /* 44-47: Cached record count. */
+ u_int32_t flags; /* 48-51: Flags: unique to each AM. */
+ /* 52-71: Unique file ID. */
+ u_int8_t uid[DB_FILE_ID_LEN];
+} DBMETA33, DBMETA;
+
+/************************************************************************
+ BTREE METADATA PAGE LAYOUT
+ ************************************************************************/
+typedef struct _btmeta33 {
+#define BTM_DUP 0x001 /* Duplicates. */
+#define BTM_RECNO 0x002 /* Recno tree. */
+#define BTM_RECNUM 0x004 /* Btree: maintain record count. */
+#define BTM_FIXEDLEN 0x008 /* Recno: fixed length records. */
+#define BTM_RENUMBER 0x010 /* Recno: renumber on insert/delete. */
+#define BTM_SUBDB 0x020 /* Subdatabases. */
+#define BTM_DUPSORT 0x040 /* Duplicates are sorted. */
+#define BTM_COMPRESS 0x080 /* Compressed. */
+#define BTM_MASK 0x0ff
+ DBMETA dbmeta; /* 00-71: Generic meta-data header. */
+
+ u_int32_t unused1; /* 72-75: Unused space. */
+ u_int32_t minkey; /* 76-79: Btree: Minkey. */
+ u_int32_t re_len; /* 80-83: Recno: fixed-length record length. */
+ u_int32_t re_pad; /* 84-87: Recno: fixed-length record pad. */
+ u_int32_t root; /* 88-91: Root page. */
+ u_int32_t unused2[92]; /* 92-459: Unused space. */
+ u_int32_t crypto_magic; /* 460-463: Crypto magic number */
+ u_int32_t trash[3]; /* 464-475: Trash space - Do not use */
+ u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */
+ u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */
+
+ /*
+ * Minimum page size is 512.
+ */
+} BTMETA33, BTMETA;
+
+/************************************************************************
+ HASH METADATA PAGE LAYOUT
+ ************************************************************************/
+typedef struct _hashmeta33 {
+#define DB_HASH_DUP 0x01 /* Duplicates. */
+#define DB_HASH_SUBDB 0x02 /* Subdatabases. */
+#define DB_HASH_DUPSORT 0x04 /* Duplicates are sorted. */
+ DBMETA dbmeta; /* 00-71: Generic meta-data page header. */
+
+ u_int32_t max_bucket; /* 72-75: ID of Maximum bucket in use */
+ u_int32_t high_mask; /* 76-79: Modulo mask into table */
+ u_int32_t low_mask; /* 80-83: Modulo mask into table lower half */
+ u_int32_t ffactor; /* 84-87: Fill factor */
+ u_int32_t nelem; /* 88-91: Number of keys in hash table */
+ u_int32_t h_charkey; /* 92-95: Value of hash(CHARKEY) */
+#define NCACHED 32 /* number of spare points */
+ /* 96-223: Spare pages for overflow */
+ u_int32_t spares[NCACHED];
+ u_int32_t unused[59]; /* 224-459: Unused space */
+ u_int32_t crypto_magic; /* 460-463: Crypto magic number */
+ u_int32_t trash[3]; /* 464-475: Trash space - Do not use */
+ u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */
+ u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */
+
+ /*
+ * Minimum page size is 512.
+ */
+} HMETA33, HMETA;
+
+/************************************************************************
+ QUEUE METADATA PAGE LAYOUT
+ ************************************************************************/
+/*
+ * QAM Meta data page structure
+ *
+ */
+typedef struct _qmeta33 {
+ DBMETA dbmeta; /* 00-71: Generic meta-data header. */
+
+ u_int32_t first_recno; /* 72-75: First not deleted record. */
+ u_int32_t cur_recno; /* 76-79: Next recno to be allocated. */
+ u_int32_t re_len; /* 80-83: Fixed-length record length. */
+ u_int32_t re_pad; /* 84-87: Fixed-length record pad. */
+ u_int32_t rec_page; /* 88-91: Records Per Page. */
+ u_int32_t page_ext; /* 92-95: Pages per extent */
+
+ u_int32_t unused[91]; /* 96-459: Unused space */
+ u_int32_t crypto_magic; /* 460-463: Crypto magic number */
+ u_int32_t trash[3]; /* 464-475: Trash space - Do not use */
+ u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */
+ u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */
+ /*
+ * Minimum page size is 512.
+ */
+} QMETA33, QMETA;
+
+/*
+ * DBMETASIZE is a constant used by __db_file_setup and DB->verify
+ * as a buffer which is guaranteed to be larger than any possible
+ * metadata page size and smaller than any disk sector.
+ */
+#define DBMETASIZE 512
+
+/************************************************************************
+ BTREE/HASH MAIN PAGE LAYOUT
+ ************************************************************************/
+/*
+ * +-----------------------------------+
+ * | lsn | pgno | prev pgno |
+ * +-----------------------------------+
+ * | next pgno | entries | hf offset |
+ * +-----------------------------------+
+ * | level | type | chksum |
+ * +-----------------------------------+
+ * | iv | index | free --> |
+ * +-----------+-----------------------+
+ * | F R E E A R E A |
+ * +-----------------------------------+
+ * | <-- free | item |
+ * +-----------------------------------+
+ * | item | item | item |
+ * +-----------------------------------+
+ *
+ * sizeof(PAGE) == 26 bytes + possibly 20 bytes of checksum and possibly
+ * 16 bytes of IV (+ 2 bytes for alignment), and the following indices
+ * are guaranteed to be two-byte aligned. If we aren't doing crypto or
+ * checksumming the bytes are reclaimed for data storage.
+ *
+ * For hash and btree leaf pages, index items are paired, e.g., inp[0] is the
+ * key for inp[1]'s data. All other types of pages only contain single items.
+ */
+typedef struct __pg_chksum {
+ u_int8_t unused[2]; /* 26-27: For alignment */
+ u_int8_t chksum[4]; /* 28-31: Checksum */
+} PG_CHKSUM;
+
+typedef struct __pg_crypto {
+ u_int8_t unused[2]; /* 26-27: For alignment */
+ u_int8_t chksum[DB_MAC_KEY]; /* 28-47: Checksum */
+ u_int8_t iv[DB_IV_BYTES]; /* 48-63: IV */
+ /* !!!
+ * Must be 16-byte aligned for crypto
+ */
+} PG_CRYPTO;
+
+typedef struct _db_page {
+ DB_LSN lsn; /* 00-07: Log sequence number. */
+ db_pgno_t pgno; /* 08-11: Current page number. */
+ db_pgno_t prev_pgno; /* 12-15: Previous page number. */
+ db_pgno_t next_pgno; /* 16-19: Next page number. */
+ db_indx_t entries; /* 20-21: Number of items on the page. */
+ db_indx_t hf_offset; /* 22-23: High free byte page offset. */
+
+ /*
+ * The btree levels are numbered from the leaf to the root, starting
+ * with 1, so the leaf is level 1, its parent is level 2, and so on.
+ * We maintain this level on all btree pages, but the only place that
+ * we actually need it is on the root page. It would not be difficult
+ * to hide the byte on the root page once it becomes an internal page,
+ * so we could get this byte back if we needed it for something else.
+ */
+#define LEAFLEVEL 1
+#define MAXBTREELEVEL 255
+ u_int8_t level; /* 24: Btree tree level. */
+ u_int8_t type; /* 25: Page type. */
+} PAGE;
+
+/*
+ * With many compilers sizeof(PAGE) == 28, while SIZEOF_PAGE == 26.
+ * We add in other things directly after the page header and need
+ * the SIZEOF_PAGE. When giving the sizeof(), many compilers will
+ * pad it out to the next 4-byte boundary.
+ */
+#define SIZEOF_PAGE 26
+/*
+ * !!!
+ * DB_AM_ENCRYPT always implies DB_AM_CHKSUM so that must come first.
+ */
+#define P_INP(dbp, pg) \
+ ((db_indx_t *)((u_int8_t *)(pg) + SIZEOF_PAGE + \
+ (F_ISSET((dbp), DB_AM_ENCRYPT) ? sizeof(PG_CRYPTO) : \
+ (F_ISSET((dbp), DB_AM_CHKSUM) ? sizeof(PG_CHKSUM) : 0))))
+
+#define P_IV(dbp, pg) \
+ (F_ISSET((dbp), DB_AM_ENCRYPT) ? ((u_int8_t *)(pg) + \
+ SIZEOF_PAGE + SSZA(PG_CRYPTO, iv)) \
+ : NULL)
+
+#define P_CHKSUM(dbp, pg) \
+ (F_ISSET((dbp), DB_AM_ENCRYPT) ? ((u_int8_t *)(pg) + \
+ SIZEOF_PAGE + SSZA(PG_CRYPTO, chksum)) : \
+ (F_ISSET((dbp), DB_AM_CHKSUM) ? ((u_int8_t *)(pg) + \
+ SIZEOF_PAGE + SSZA(PG_CHKSUM, chksum)) \
+ : NULL))
+
+/* PAGE element macros. */
+#define LSN(p) (((PAGE *)p)->lsn)
+#define PGNO(p) (((PAGE *)p)->pgno)
+#define PREV_PGNO(p) (((PAGE *)p)->prev_pgno)
+#define NEXT_PGNO(p) (((PAGE *)p)->next_pgno)
+#define NUM_ENT(p) (((PAGE *)p)->entries)
+#define HOFFSET(p) (((PAGE *)p)->hf_offset)
+#define LEVEL(p) (((PAGE *)p)->level)
+#define TYPE(p) (((PAGE *)p)->type)
+
+/************************************************************************
+ QUEUE MAIN PAGE LAYOUT
+ ************************************************************************/
+/*
+ * Sizes of page below. Used to reclaim space if not doing
+ * crypto or checksumming. If you change the QPAGE below you
+ * MUST adjust this too.
+ */
+#define QPAGE_NORMAL 28
+#define QPAGE_CHKSUM 48
+#define QPAGE_SEC 64
+
+typedef struct _qpage {
+ DB_LSN lsn; /* 00-07: Log sequence number. */
+ db_pgno_t pgno; /* 08-11: Current page number. */
+ u_int32_t unused0[3]; /* 12-23: Unused. */
+ u_int8_t unused1[1]; /* 24: Unused. */
+ u_int8_t type; /* 25: Page type. */
+ u_int8_t unused2[2]; /* 26-27: Unused. */
+ u_int8_t chksum[DB_MAC_KEY]; /* 28-47: Checksum */
+ u_int8_t iv[DB_IV_BYTES]; /* 48-63: IV */
+} QPAGE;
+
+#define QPAGE_SZ(dbp) \
+ (F_ISSET((dbp), DB_AM_ENCRYPT) ? QPAGE_SEC : \
+ F_ISSET((dbp), DB_AM_CHKSUM) ? QPAGE_CHKSUM : QPAGE_NORMAL)
+/*
+ * !!!
+ * The next_pgno and prev_pgno fields are not maintained for btree and recno
+ * internal pages. Doing so only provides a minor performance improvement,
+ * it's hard to do when deleting internal pages, and it increases the chance
+ * of deadlock during deletes and splits because we have to re-link pages at
+ * more than the leaf level.
+ *
+ * !!!
+ * The btree/recno access method needs db_recno_t bytes of space on the root
+ * page to specify how many records are stored in the tree. (The alternative
+ * is to store the number of records in the meta-data page, which will create
+ * a second hot spot in trees being actively modified, or recalculate it from
+ * the BINTERNAL fields on each access.) Overload the PREV_PGNO field.
+ */
+#define RE_NREC(p) \
+ ((TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) ? PREV_PGNO(p) : \
+ (db_pgno_t)(TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 : NUM_ENT(p)))
+#define RE_NREC_ADJ(p, adj) \
+ PREV_PGNO(p) += adj;
+#define RE_NREC_SET(p, num) \
+ PREV_PGNO(p) = (num);
+
+/*
+ * Initialize a page.
+ *
+ * !!!
+ * Don't modify the page's LSN, code depends on it being unchanged after a
+ * P_INIT call.
+ */
+#define P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do { \
+ PGNO(pg) = (n); \
+ PREV_PGNO(pg) = (pg_prev); \
+ NEXT_PGNO(pg) = (pg_next); \
+ NUM_ENT(pg) = (0); \
+ HOFFSET(pg) = (db_indx_t)(pg_size); \
+ LEVEL(pg) = (btl); \
+ TYPE(pg) = (pg_type); \
+} while (0)
+
+/* Page header length (offset to first index). */
+#define P_OVERHEAD(dbp) P_TO_UINT16(P_INP(dbp, 0))
+
+/* First free byte. */
+#define LOFFSET(dbp, pg) \
+ (P_OVERHEAD(dbp) + NUM_ENT(pg) * sizeof(db_indx_t))
+
+/* Free space on a regular page. */
+#define P_FREESPACE(dbp, pg) (HOFFSET(pg) - LOFFSET(dbp, pg))
+
+/* Get a pointer to the bytes at a specific index. */
+#define P_ENTRY(dbp, pg, indx) ((u_int8_t *)pg + P_INP(dbp, pg)[indx])
+
+/************************************************************************
+ OVERFLOW PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ * Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which
+ * store a page number (the first page of the overflow item) and a length
+ * (the total length of the overflow item). The overflow item consists of
+ * some number of overflow pages, linked by the next_pgno field of the page.
+ * A next_pgno field of PGNO_INVALID flags the end of the overflow item.
+ *
+ * Overflow page overloads:
+ * The amount of overflow data stored on each page is stored in the
+ * hf_offset field.
+ *
+ * The implementation reference counts overflow items as it's possible
+ * for them to be promoted onto btree internal pages. The reference
+ * count is stored in the entries field.
+ */
+#define OV_LEN(p) (((PAGE *)p)->hf_offset)
+#define OV_REF(p) (((PAGE *)p)->entries)
+
+/* Maximum number of bytes that you can put on an overflow page. */
+#define P_MAXSPACE(dbp, psize) ((psize) - P_OVERHEAD(dbp))
+
+/* Free space on an overflow page. */
+#define P_OVFLSPACE(dbp, psize, pg) (P_MAXSPACE(dbp, psize) - HOFFSET(pg))
+
+/************************************************************************
+ HASH PAGE LAYOUT
+ ************************************************************************/
+
+/* Each index references a group of bytes on the page. */
+#define H_KEYDATA 1 /* Key/data item. */
+#define H_DUPLICATE 2 /* Duplicate key/data item. */
+#define H_OFFPAGE 3 /* Overflow key/data item. */
+#define H_OFFDUP 4 /* Overflow page of duplicates. */
+
+/*
+ * !!!
+ * Items on hash pages are (potentially) unaligned, so we can never cast the
+ * (page + offset) pointer to an HKEYDATA, HOFFPAGE or HOFFDUP structure, as
+ * we do with B+tree on-page structures. Because we frequently want the type
+ * field, it requires no alignment, and it's in the same location in all three
+ * structures, there's a pair of macros.
+ */
+#define HPAGE_PTYPE(p) (*(u_int8_t *)p)
+#define HPAGE_TYPE(dbp, pg, indx) (*P_ENTRY(dbp, pg, indx))
+
+/*
+ * The first and second types are H_KEYDATA and H_DUPLICATE, represented
+ * by the HKEYDATA structure:
+ *
+ * +-----------------------------------+
+ * | type | key/data ... |
+ * +-----------------------------------+
+ *
+ * For duplicates, the data field encodes duplicate elements in the data
+ * field:
+ *
+ * +---------------------------------------------------------------+
+ * | type | len1 | element1 | len1 | len2 | element2 | len2 |
+ * +---------------------------------------------------------------+
+ *
+ * Thus, by keeping track of the offset in the element, we can do both
+ * backward and forward traversal.
+ */
+typedef struct _hkeydata {
+ u_int8_t type; /* 00: Page type. */
+ u_int8_t data[1]; /* Variable length key/data item. */
+} HKEYDATA;
+#define HKEYDATA_DATA(p) (((u_int8_t *)p) + SSZA(HKEYDATA, data))
+
+/*
+ * The length of any HKEYDATA item. Note that indx is an element index,
+ * not a PAIR index.
+ */
+#define LEN_HITEM(dbp, pg, pgsize, indx) \
+ (((indx) == 0 ? (pgsize) : \
+ (P_INP(dbp, pg)[(indx) - 1])) - (P_INP(dbp, pg)[indx]))
+
+#define LEN_HKEYDATA(dbp, pg, psize, indx) \
+ (db_indx_t)(LEN_HITEM(dbp, pg, psize, indx) - HKEYDATA_SIZE(0))
+
+/*
+ * Page space required to add a new HKEYDATA item to the page, with and
+ * without the index value.
+ */
+#define HKEYDATA_SIZE(len) \
+ ((len) + SSZA(HKEYDATA, data))
+#define HKEYDATA_PSIZE(len) \
+ (HKEYDATA_SIZE(len) + sizeof(db_indx_t))
+
+/* Put a HKEYDATA item at the location referenced by a page entry. */
+#define PUT_HKEYDATA(pe, kd, len, etype) { \
+ ((HKEYDATA *)(pe))->type = etype; \
+ memcpy((u_int8_t *)(pe) + sizeof(u_int8_t), kd, len); \
+}
+
+/*
+ * Macros the describe the page layout in terms of key-data pairs.
+ */
+#define H_NUMPAIRS(pg) (NUM_ENT(pg) / 2)
+#define H_KEYINDEX(indx) (indx)
+#define H_DATAINDEX(indx) ((indx) + 1)
+#define H_PAIRKEY(dbp, pg, indx) P_ENTRY(dbp, pg, H_KEYINDEX(indx))
+#define H_PAIRDATA(dbp, pg, indx) P_ENTRY(dbp, pg, H_DATAINDEX(indx))
+#define H_PAIRSIZE(dbp, pg, psize, indx) \
+ (LEN_HITEM(dbp, pg, psize, H_KEYINDEX(indx)) + \
+ LEN_HITEM(dbp, pg, psize, H_DATAINDEX(indx)))
+#define LEN_HDATA(dbp, p, psize, indx) \
+ LEN_HKEYDATA(dbp, p, psize, H_DATAINDEX(indx))
+#define LEN_HKEY(dbp, p, psize, indx) \
+ LEN_HKEYDATA(dbp, p, psize, H_KEYINDEX(indx))
+
+/*
+ * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure:
+ */
+typedef struct _hoffpage {
+ u_int8_t type; /* 00: Page type and delete flag. */
+ u_int8_t unused[3]; /* 01-03: Padding, unused. */
+ db_pgno_t pgno; /* 04-07: Offpage page number. */
+ u_int32_t tlen; /* 08-11: Total length of item. */
+} HOFFPAGE;
+
+#define HOFFPAGE_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, pgno))
+#define HOFFPAGE_TLEN(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, tlen))
+
+/*
+ * Page space required to add a new HOFFPAGE item to the page, with and
+ * without the index value.
+ */
+#define HOFFPAGE_SIZE (sizeof(HOFFPAGE))
+#define HOFFPAGE_PSIZE (HOFFPAGE_SIZE + sizeof(db_indx_t))
+
+/*
+ * The fourth type is H_OFFDUP represented by the HOFFDUP structure:
+ */
+typedef struct _hoffdup {
+ u_int8_t type; /* 00: Page type and delete flag. */
+ u_int8_t unused[3]; /* 01-03: Padding, unused. */
+ db_pgno_t pgno; /* 04-07: Offpage page number. */
+} HOFFDUP;
+#define HOFFDUP_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFDUP, pgno))
+
+/*
+ * Page space required to add a new HOFFDUP item to the page, with and
+ * without the index value.
+ */
+#define HOFFDUP_SIZE (sizeof(HOFFDUP))
+
+/************************************************************************
+ BTREE PAGE LAYOUT
+ ************************************************************************/
+
+/* Each index references a group of bytes on the page. */
+#define B_KEYDATA 1 /* Key/data item. */
+#define B_DUPLICATE 2 /* Duplicate key/data item. */
+#define B_OVERFLOW 3 /* Overflow key/data item. */
+
+/*
+ * We have to store a deleted entry flag in the page. The reason is complex,
+ * but the simple version is that we can't delete on-page items referenced by
+ * a cursor -- the return order of subsequent insertions might be wrong. The
+ * delete flag is an overload of the top bit of the type byte.
+ */
+#define B_DELETE (0x80)
+#define B_DCLR(t) (t) &= ~B_DELETE
+#define B_DSET(t) (t) |= B_DELETE
+#define B_DISSET(t) ((t) & B_DELETE)
+
+#define B_TYPE(t) ((t) & ~B_DELETE)
+#define B_TSET(t, type) ((t) = B_TYPE(type))
+#define B_TSET_DELETED(t, type) ((t) = (type) | B_DELETE)
+
+/*
+ * The first type is B_KEYDATA, represented by the BKEYDATA structure:
+ */
+typedef struct _bkeydata {
+ db_indx_t len; /* 00-01: Key/data item length. */
+ u_int8_t type; /* 02: Page type AND DELETE FLAG. */
+ u_int8_t data[1]; /* Variable length key/data item. */
+} BKEYDATA;
+
+/* Get a BKEYDATA item for a specific index. */
+#define GET_BKEYDATA(dbp, pg, indx) \
+ ((BKEYDATA *)P_ENTRY(dbp, pg, indx))
+
+/*
+ * Page space required to add a new BKEYDATA item to the page, with and
+ * without the index value. The (u_int16_t) cast avoids warnings: DB_ALIGN
+ * casts to uintmax_t, the cast converts it to a small integral type so we
+ * don't get complaints when we assign the final result to an integral type
+ * smaller than uintmax_t.
+ */
+#define BKEYDATA_SIZE(len) \
+ (u_int16_t)DB_ALIGN((len) + SSZA(BKEYDATA, data), sizeof(u_int32_t))
+#define BKEYDATA_PSIZE(len) \
+ (BKEYDATA_SIZE(len) + sizeof(db_indx_t))
+
+/*
+ * The second and third types are B_DUPLICATE and B_OVERFLOW, represented
+ * by the BOVERFLOW structure.
+ */
+typedef struct _boverflow {
+ db_indx_t unused1; /* 00-01: Padding, unused. */
+ u_int8_t type; /* 02: Page type AND DELETE FLAG. */
+ u_int8_t unused2; /* 03: Padding, unused. */
+ db_pgno_t pgno; /* 04-07: Next page number. */
+ u_int32_t tlen; /* 08-11: Total length of item. */
+} BOVERFLOW;
+
+/* Get a BOVERFLOW item for a specific index. */
+#define GET_BOVERFLOW(dbp, pg, indx) \
+ ((BOVERFLOW *)P_ENTRY(dbp, pg, indx))
+
+/*
+ * Page space required to add a new BOVERFLOW item to the page, with and
+ * without the index value.
+ */
+#define BOVERFLOW_SIZE \
+ ((u_int16_t)DB_ALIGN(sizeof(BOVERFLOW), sizeof(u_int32_t)))
+#define BOVERFLOW_PSIZE \
+ (BOVERFLOW_SIZE + sizeof(db_indx_t))
+
+#define BITEM_SIZE(bk) \
+ (B_TYPE((bk)->type) != B_KEYDATA ? BOVERFLOW_SIZE : \
+ BKEYDATA_SIZE((bk)->len))
+
+#define BITEM_PSIZE(bk) \
+ (B_TYPE((bk)->type) != B_KEYDATA ? BOVERFLOW_PSIZE : \
+ BKEYDATA_PSIZE((bk)->len))
+
+/*
+ * Btree leaf and hash page layouts group indices in sets of two, one for the
+ * key and one for the data. Everything else does it in sets of one to save
+ * space. Use the following macros so that it's real obvious what's going on.
+ */
+#define O_INDX 1
+#define P_INDX 2
+
+/************************************************************************
+ BTREE INTERNAL PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ * Btree internal entry.
+ */
+typedef struct _binternal {
+ db_indx_t len; /* 00-01: Key/data item length. */
+ u_int8_t type; /* 02: Page type AND DELETE FLAG. */
+ u_int8_t unused; /* 03: Padding, unused. */
+ db_pgno_t pgno; /* 04-07: Page number of referenced page. */
+ db_recno_t nrecs; /* 08-11: Subtree record count. */
+ u_int8_t data[1]; /* Variable length key item. */
+} BINTERNAL;
+
+/* Get a BINTERNAL item for a specific index. */
+#define GET_BINTERNAL(dbp, pg, indx) \
+ ((BINTERNAL *)P_ENTRY(dbp, pg, indx))
+
+/*
+ * Page space required to add a new BINTERNAL item to the page, with and
+ * without the index value.
+ */
+#define BINTERNAL_SIZE(len) \
+ (u_int16_t)DB_ALIGN((len) + SSZA(BINTERNAL, data), sizeof(u_int32_t))
+#define BINTERNAL_PSIZE(len) \
+ (BINTERNAL_SIZE(len) + sizeof(db_indx_t))
+
+/************************************************************************
+ RECNO INTERNAL PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ * The recno internal entry.
+ */
+typedef struct _rinternal {
+ db_pgno_t pgno; /* 00-03: Page number of referenced page. */
+ db_recno_t nrecs; /* 04-07: Subtree record count. */
+} RINTERNAL;
+
+/* Get a RINTERNAL item for a specific index. */
+#define GET_RINTERNAL(dbp, pg, indx) \
+ ((RINTERNAL *)P_ENTRY(dbp, pg, indx))
+
+/*
+ * Page space required to add a new RINTERNAL item to the page, with and
+ * without the index value.
+ */
+#define RINTERNAL_SIZE \
+ (u_int16_t)DB_ALIGN(sizeof(RINTERNAL), sizeof(u_int32_t))
+#define RINTERNAL_PSIZE \
+ (RINTERNAL_SIZE + sizeof(db_indx_t))
+
+typedef struct __pglist {
+ db_pgno_t pgno, next_pgno;
+ DB_LSN lsn;
+} db_pglist_t;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* !_DB_PAGE_H_ */
diff --git a/db-4.8.30/dbinc/db_swap.h b/db-4.8.30/dbinc/db_swap.h
new file mode 100644
index 0000000..dab657c
--- /dev/null
+++ b/db-4.8.30/dbinc/db_swap.h
@@ -0,0 +1,262 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_SWAP_H_
+#define _DB_SWAP_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Little endian <==> big endian 64-bit swap macros.
+ * M_64_SWAP swap a memory location
+ * P_64_COPY copy potentially unaligned 4 byte quantities
+ * P_64_SWAP swap a referenced memory location
+ */
+#undef M_64_SWAP
+#define M_64_SWAP(a) { \
+ u_int64_t _tmp; \
+ _tmp = (u_int64_t)a; \
+ ((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[7]; \
+ ((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[6]; \
+ ((u_int8_t *)&a)[2] = ((u_int8_t *)&_tmp)[5]; \
+ ((u_int8_t *)&a)[3] = ((u_int8_t *)&_tmp)[4]; \
+ ((u_int8_t *)&a)[4] = ((u_int8_t *)&_tmp)[3]; \
+ ((u_int8_t *)&a)[5] = ((u_int8_t *)&_tmp)[2]; \
+ ((u_int8_t *)&a)[6] = ((u_int8_t *)&_tmp)[1]; \
+ ((u_int8_t *)&a)[7] = ((u_int8_t *)&_tmp)[0]; \
+}
+#undef P_64_COPY
+#define P_64_COPY(a, b) { \
+ ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \
+ ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \
+ ((u_int8_t *)b)[2] = ((u_int8_t *)a)[2]; \
+ ((u_int8_t *)b)[3] = ((u_int8_t *)a)[3]; \
+ ((u_int8_t *)b)[4] = ((u_int8_t *)a)[4]; \
+ ((u_int8_t *)b)[5] = ((u_int8_t *)a)[5]; \
+ ((u_int8_t *)b)[6] = ((u_int8_t *)a)[6]; \
+ ((u_int8_t *)b)[7] = ((u_int8_t *)a)[7]; \
+}
+#undef P_64_SWAP
+#define P_64_SWAP(a) { \
+ u_int64_t _tmp; \
+ P_64_COPY(a, &_tmp); \
+ ((u_int8_t *)a)[0] = ((u_int8_t *)&_tmp)[7]; \
+ ((u_int8_t *)a)[1] = ((u_int8_t *)&_tmp)[6]; \
+ ((u_int8_t *)a)[2] = ((u_int8_t *)&_tmp)[5]; \
+ ((u_int8_t *)a)[3] = ((u_int8_t *)&_tmp)[4]; \
+ ((u_int8_t *)a)[4] = ((u_int8_t *)&_tmp)[3]; \
+ ((u_int8_t *)a)[5] = ((u_int8_t *)&_tmp)[2]; \
+ ((u_int8_t *)a)[6] = ((u_int8_t *)&_tmp)[1]; \
+ ((u_int8_t *)a)[7] = ((u_int8_t *)&_tmp)[0]; \
+}
+
+/*
+ * Little endian <==> big endian 32-bit swap macros.
+ * P_32_COPY copy potentially unaligned 4 byte quantities
+ * P_32_COPYSWAP copy and swap potentially unaligned 4 byte quantities
+ * P_32_SWAP swap a referenced memory location
+ * M_32_SWAP swap a memory location
+ */
+#undef P_32_COPY
+#define P_32_COPY(a, b) do { \
+ ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \
+ ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \
+ ((u_int8_t *)b)[2] = ((u_int8_t *)a)[2]; \
+ ((u_int8_t *)b)[3] = ((u_int8_t *)a)[3]; \
+} while (0)
+#undef P_32_COPYSWAP
+#define P_32_COPYSWAP(a, b) do { \
+ ((u_int8_t *)b)[0] = ((u_int8_t *)a)[3]; \
+ ((u_int8_t *)b)[1] = ((u_int8_t *)a)[2]; \
+ ((u_int8_t *)b)[2] = ((u_int8_t *)a)[1]; \
+ ((u_int8_t *)b)[3] = ((u_int8_t *)a)[0]; \
+} while (0)
+#undef P_32_SWAP
+#define P_32_SWAP(a) do { \
+ u_int32_t _tmp; \
+ P_32_COPY(a, &_tmp); \
+ P_32_COPYSWAP(&_tmp, a); \
+} while (0)
+#undef M_32_SWAP
+#define M_32_SWAP(a) P_32_SWAP(&a)
+
+/*
+ * Little endian <==> big endian 16-bit swap macros.
+ * P_16_COPY copy potentially unaligned 2 byte quantities
+ * P_16_COPYSWAP copy and swap potentially unaligned 2 byte quantities
+ * P_16_SWAP swap a referenced memory location
+ * M_16_SWAP swap a memory location
+ */
+#undef P_16_COPY
+#define P_16_COPY(a, b) do { \
+ ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \
+ ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \
+} while (0)
+#undef P_16_COPYSWAP
+#define P_16_COPYSWAP(a, b) do { \
+ ((u_int8_t *)b)[0] = ((u_int8_t *)a)[1]; \
+ ((u_int8_t *)b)[1] = ((u_int8_t *)a)[0]; \
+} while (0)
+#undef P_16_SWAP
+#define P_16_SWAP(a) do { \
+ u_int16_t _tmp; \
+ P_16_COPY(a, &_tmp); \
+ P_16_COPYSWAP(&_tmp, a); \
+} while (0)
+#undef M_16_SWAP
+#define M_16_SWAP(a) P_16_SWAP(&a)
+
+#undef SWAP32
+#define SWAP32(p) { \
+ P_32_SWAP(p); \
+ (p) += sizeof(u_int32_t); \
+}
+#undef SWAP16
+#define SWAP16(p) { \
+ P_16_SWAP(p); \
+ (p) += sizeof(u_int16_t); \
+}
+
+/*
+ * Berkeley DB has local versions of htonl() and ntohl() that operate on
+ * pointers to the right size memory locations; the portability magic for
+ * finding the real system functions isn't worth the effort.
+ */
+#undef DB_HTONL_SWAP
+#define DB_HTONL_SWAP(env, p) do { \
+ if (F_ISSET((env), ENV_LITTLEENDIAN)) \
+ P_32_SWAP(p); \
+} while (0)
+#undef DB_NTOHL_SWAP
+#define DB_NTOHL_SWAP(env, p) do { \
+ if (F_ISSET((env), ENV_LITTLEENDIAN)) \
+ P_32_SWAP(p); \
+} while (0)
+
+#undef DB_NTOHL_COPYIN
+#define DB_NTOHL_COPYIN(env, i, p) do { \
+ u_int8_t *tmp; \
+ tmp = (u_int8_t *)&(i); \
+ if (F_ISSET(env, ENV_LITTLEENDIAN)) { \
+ tmp[3] = *p++; \
+ tmp[2] = *p++; \
+ tmp[1] = *p++; \
+ tmp[0] = *p++; \
+ } else { \
+ memcpy(&i, p, sizeof(u_int32_t)); \
+ p = (u_int8_t *)p + sizeof(u_int32_t); \
+ } \
+} while (0)
+
+#undef DB_NTOHS_COPYIN
+#define DB_NTOHS_COPYIN(env, i, p) do { \
+ u_int8_t *tmp; \
+ tmp = (u_int8_t *)&(i); \
+ if (F_ISSET(env, ENV_LITTLEENDIAN)) { \
+ tmp[1] = *p++; \
+ tmp[0] = *p++; \
+ } else { \
+ memcpy(&i, p, sizeof(u_int16_t)); \
+ p = (u_int8_t *)p + sizeof(u_int16_t); \
+ } \
+} while (0)
+
+#undef DB_HTONL_COPYOUT
+#define DB_HTONL_COPYOUT(env, p, i) do { \
+ u_int8_t *tmp; \
+ tmp = (u_int8_t *)p; \
+ if (F_ISSET(env, ENV_LITTLEENDIAN)) { \
+ *tmp++ = ((u_int8_t *)&(i))[3]; \
+ *tmp++ = ((u_int8_t *)&(i))[2]; \
+ *tmp++ = ((u_int8_t *)&(i))[1]; \
+ *tmp++ = ((u_int8_t *)&(i))[0]; \
+ } else \
+ memcpy(p, &i, sizeof(u_int32_t)); \
+ p = (u_int8_t *)p + sizeof(u_int32_t); \
+} while (0)
+
+#undef DB_HTONS_COPYOUT
+#define DB_HTONS_COPYOUT(env, p, i) do { \
+ u_int8_t *tmp; \
+ tmp = (u_int8_t *)p; \
+ if (F_ISSET(env, ENV_LITTLEENDIAN)) { \
+ *tmp++ = ((u_int8_t *)&(i))[1]; \
+ *tmp++ = ((u_int8_t *)&(i))[0]; \
+ } else \
+ memcpy(p, &i, sizeof(u_int16_t)); \
+ p = (u_int8_t *)p + sizeof(u_int16_t); \
+} while (0)
+
+/*
+ * Helper macros for swapped logs. We write logs in little endian format to
+ * minimize disruption on x86 when upgrading from native byte order to
+ * platform-independent logs.
+ */
+#define LOG_SWAPPED(env) !F_ISSET(env, ENV_LITTLEENDIAN)
+
+#define LOGCOPY_32(env, x, p) do { \
+ if (LOG_SWAPPED(env)) \
+ P_32_COPYSWAP((p), (x)); \
+ else \
+ memcpy((x), (p), sizeof(u_int32_t)); \
+} while (0)
+
+#define LOGCOPY_16(env, x, p) do { \
+ if (LOG_SWAPPED(env)) \
+ P_16_COPYSWAP((p), (x)); \
+ else \
+ memcpy((x), (p), sizeof(u_int16_t)); \
+} while (0)
+
+#define LOGCOPY_TOLSN(env, lsnp, p) do { \
+ LOGCOPY_32((env), &(lsnp)->file, (p)); \
+ LOGCOPY_32((env), &(lsnp)->offset, \
+ (u_int8_t *)(p) + sizeof(u_int32_t)); \
+} while (0)
+
+#define LOGCOPY_FROMLSN(env, p, lsnp) do { \
+ LOGCOPY_32((env), (p), &(lsnp)->file); \
+ LOGCOPY_32((env), \
+ (u_int8_t *)(p) + sizeof(u_int32_t), &(lsnp)->offset); \
+} while (0)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* !_DB_SWAP_H_ */
diff --git a/db-4.8.30/dbinc/db_upgrade.h b/db-4.8.30/dbinc/db_upgrade.h
new file mode 100644
index 0000000..b9f1c32
--- /dev/null
+++ b/db-4.8.30/dbinc/db_upgrade.h
@@ -0,0 +1,248 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_UPGRADE_H_
+#define _DB_UPGRADE_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * This file defines the metadata pages from the previous release.
+ * These structures are only used to upgrade old versions of databases.
+ */
+
+/* Structures from the 3.1 release */
+typedef struct _dbmeta31 {
+ DB_LSN lsn; /* 00-07: LSN. */
+ db_pgno_t pgno; /* 08-11: Current page number. */
+ u_int32_t magic; /* 12-15: Magic number. */
+ u_int32_t version; /* 16-19: Version. */
+ u_int32_t pagesize; /* 20-23: Pagesize. */
+ u_int8_t unused1[1]; /* 24: Unused. */
+ u_int8_t type; /* 25: Page type. */
+ u_int8_t unused2[2]; /* 26-27: Unused. */
+ u_int32_t free; /* 28-31: Free list page number. */
+ DB_LSN unused3; /* 36-39: Unused. */
+ u_int32_t key_count; /* 40-43: Cached key count. */
+ u_int32_t record_count; /* 44-47: Cached record count. */
+ u_int32_t flags; /* 48-51: Flags: unique to each AM. */
+ /* 52-71: Unique file ID. */
+ u_int8_t uid[DB_FILE_ID_LEN];
+} DBMETA31;
+
+typedef struct _btmeta31 {
+ DBMETA31 dbmeta; /* 00-71: Generic meta-data header. */
+
+ u_int32_t maxkey; /* 72-75: Btree: Maxkey. */
+ u_int32_t minkey; /* 76-79: Btree: Minkey. */
+ u_int32_t re_len; /* 80-83: Recno: fixed-length record length. */
+ u_int32_t re_pad; /* 84-87: Recno: fixed-length record pad. */
+ u_int32_t root; /* 88-92: Root page. */
+
+ /*
+ * Minimum page size is 128.
+ */
+} BTMETA31;
+
+/************************************************************************
+ HASH METADATA PAGE LAYOUT
+ ************************************************************************/
+typedef struct _hashmeta31 {
+ DBMETA31 dbmeta; /* 00-71: Generic meta-data page header. */
+
+ u_int32_t max_bucket; /* 72-75: ID of Maximum bucket in use */
+ u_int32_t high_mask; /* 76-79: Modulo mask into table */
+ u_int32_t low_mask; /* 80-83: Modulo mask into table lower half */
+ u_int32_t ffactor; /* 84-87: Fill factor */
+ u_int32_t nelem; /* 88-91: Number of keys in hash table */
+ u_int32_t h_charkey; /* 92-95: Value of hash(CHARKEY) */
+#define NCACHED 32 /* number of spare points */
+ /* 96-223: Spare pages for overflow */
+ u_int32_t spares[NCACHED];
+
+ /*
+ * Minimum page size is 256.
+ */
+} HMETA31;
+
+/*
+ * QAM Meta data page structure
+ *
+ */
+typedef struct _qmeta31 {
+ DBMETA31 dbmeta; /* 00-71: Generic meta-data header. */
+
+ u_int32_t start; /* 72-75: Start offset. */
+ u_int32_t first_recno; /* 76-79: First not deleted record. */
+ u_int32_t cur_recno; /* 80-83: Last recno allocated. */
+ u_int32_t re_len; /* 84-87: Fixed-length record length. */
+ u_int32_t re_pad; /* 88-91: Fixed-length record pad. */
+ u_int32_t rec_page; /* 92-95: Records Per Page. */
+
+ /*
+ * Minimum page size is 128.
+ */
+} QMETA31;
+/* Structures from the 3.2 release */
+typedef struct _qmeta32 {
+ DBMETA31 dbmeta; /* 00-71: Generic meta-data header. */
+
+ u_int32_t first_recno; /* 72-75: First not deleted record. */
+ u_int32_t cur_recno; /* 76-79: Last recno allocated. */
+ u_int32_t re_len; /* 80-83: Fixed-length record length. */
+ u_int32_t re_pad; /* 84-87: Fixed-length record pad. */
+ u_int32_t rec_page; /* 88-91: Records Per Page. */
+ u_int32_t page_ext; /* 92-95: Pages per extent */
+
+ /*
+ * Minimum page size is 128.
+ */
+} QMETA32;
+
+/* Structures from the 3.0 release */
+
+typedef struct _dbmeta30 {
+ DB_LSN lsn; /* 00-07: LSN. */
+ db_pgno_t pgno; /* 08-11: Current page number. */
+ u_int32_t magic; /* 12-15: Magic number. */
+ u_int32_t version; /* 16-19: Version. */
+ u_int32_t pagesize; /* 20-23: Pagesize. */
+ u_int8_t unused1[1]; /* 24: Unused. */
+ u_int8_t type; /* 25: Page type. */
+ u_int8_t unused2[2]; /* 26-27: Unused. */
+ u_int32_t free; /* 28-31: Free list page number. */
+ u_int32_t flags; /* 32-35: Flags: unique to each AM. */
+ /* 36-55: Unique file ID. */
+ u_int8_t uid[DB_FILE_ID_LEN];
+} DBMETA30;
+
+/************************************************************************
+ BTREE METADATA PAGE LAYOUT
+ ************************************************************************/
+typedef struct _btmeta30 {
+ DBMETA30 dbmeta; /* 00-55: Generic meta-data header. */
+
+ u_int32_t maxkey; /* 56-59: Btree: Maxkey. */
+ u_int32_t minkey; /* 60-63: Btree: Minkey. */
+ u_int32_t re_len; /* 64-67: Recno: fixed-length record length. */
+ u_int32_t re_pad; /* 68-71: Recno: fixed-length record pad. */
+ u_int32_t root; /* 72-75: Root page. */
+
+ /*
+ * Minimum page size is 128.
+ */
+} BTMETA30;
+
+/************************************************************************
+ HASH METADATA PAGE LAYOUT
+ ************************************************************************/
+typedef struct _hashmeta30 {
+ DBMETA30 dbmeta; /* 00-55: Generic meta-data page header. */
+
+ u_int32_t max_bucket; /* 56-59: ID of Maximum bucket in use */
+ u_int32_t high_mask; /* 60-63: Modulo mask into table */
+ u_int32_t low_mask; /* 64-67: Modulo mask into table lower half */
+ u_int32_t ffactor; /* 68-71: Fill factor */
+ u_int32_t nelem; /* 72-75: Number of keys in hash table */
+ u_int32_t h_charkey; /* 76-79: Value of hash(CHARKEY) */
+#define NCACHED30 32 /* number of spare points */
+ /* 80-207: Spare pages for overflow */
+ u_int32_t spares[NCACHED30];
+
+ /*
+ * Minimum page size is 256.
+ */
+} HMETA30;
+
+/************************************************************************
+ QUEUE METADATA PAGE LAYOUT
+ ************************************************************************/
+/*
+ * QAM Meta data page structure
+ *
+ */
+typedef struct _qmeta30 {
+ DBMETA30 dbmeta; /* 00-55: Generic meta-data header. */
+
+ u_int32_t start; /* 56-59: Start offset. */
+ u_int32_t first_recno; /* 60-63: First not deleted record. */
+ u_int32_t cur_recno; /* 64-67: Last recno allocated. */
+ u_int32_t re_len; /* 68-71: Fixed-length record length. */
+ u_int32_t re_pad; /* 72-75: Fixed-length record pad. */
+ u_int32_t rec_page; /* 76-79: Records Per Page. */
+
+ /*
+ * Minimum page size is 128.
+ */
+} QMETA30;
+
+/* Structures from Release 2.x */
+
+/************************************************************************
+ BTREE METADATA PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ * Btree metadata page layout:
+ */
+typedef struct _btmeta2X {
+ DB_LSN lsn; /* 00-07: LSN. */
+ db_pgno_t pgno; /* 08-11: Current page number. */
+ u_int32_t magic; /* 12-15: Magic number. */
+ u_int32_t version; /* 16-19: Version. */
+ u_int32_t pagesize; /* 20-23: Pagesize. */
+ u_int32_t maxkey; /* 24-27: Btree: Maxkey. */
+ u_int32_t minkey; /* 28-31: Btree: Minkey. */
+ u_int32_t free; /* 32-35: Free list page number. */
+ u_int32_t flags; /* 36-39: Flags. */
+ u_int32_t re_len; /* 40-43: Recno: fixed-length record length. */
+ u_int32_t re_pad; /* 44-47: Recno: fixed-length record pad. */
+ /* 48-67: Unique file ID. */
+ u_int8_t uid[DB_FILE_ID_LEN];
+} BTMETA2X;
+
+/************************************************************************
+ HASH METADATA PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ * Hash metadata page layout:
+ */
+/* Hash Table Information */
+typedef struct hashhdr { /* Disk resident portion */
+ DB_LSN lsn; /* 00-07: LSN of the header page */
+ db_pgno_t pgno; /* 08-11: Page number (btree compatibility). */
+ u_int32_t magic; /* 12-15: Magic NO for hash tables */
+ u_int32_t version; /* 16-19: Version ID */
+ u_int32_t pagesize; /* 20-23: Bucket/Page Size */
+ u_int32_t ovfl_point; /* 24-27: Overflow page allocation location */
+ u_int32_t last_freed; /* 28-31: Last freed overflow page pgno */
+ u_int32_t max_bucket; /* 32-35: ID of Maximum bucket in use */
+ u_int32_t high_mask; /* 36-39: Modulo mask into table */
+ u_int32_t low_mask; /* 40-43: Modulo mask into table lower half */
+ u_int32_t ffactor; /* 44-47: Fill factor */
+ u_int32_t nelem; /* 48-51: Number of keys in hash table */
+ u_int32_t h_charkey; /* 52-55: Value of hash(CHARKEY) */
+ u_int32_t flags; /* 56-59: Allow duplicates. */
+#define NCACHED2X 32 /* number of spare points */
+ /* 60-187: Spare pages for overflow */
+ u_int32_t spares[NCACHED2X];
+ /* 188-207: Unique file ID. */
+ u_int8_t uid[DB_FILE_ID_LEN];
+
+ /*
+ * Minimum page size is 256.
+ */
+} HASHHDR;
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_UPGRADE_H_ */
diff --git a/db-4.8.30/dbinc/db_verify.h b/db-4.8.30/dbinc/db_verify.h
new file mode 100644
index 0000000..6cfd1d8
--- /dev/null
+++ b/db-4.8.30/dbinc/db_verify.h
@@ -0,0 +1,204 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_VERIFY_H_
+#define _DB_VERIFY_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Structures and macros for the storage and retrieval of all information
+ * needed for inter-page verification of a database.
+ */
+
+/*
+ * EPRINT is the macro for error printing. Takes as an arg the arg set
+ * for DB->err.
+ */
+#define EPRINT(x) do { \
+ if (!LF_ISSET(DB_SALVAGE)) \
+ __db_errx x; \
+} while (0)
+
+/* Complain about a totally zeroed page where we don't expect one. */
+#define ZEROPG_ERR_PRINT(dbenv, pgno, str) do { \
+ EPRINT(((dbenv), "Page %lu: %s is of inappropriate type %lu", \
+ (u_long)(pgno), str, (u_long)P_INVALID)); \
+ EPRINT(((dbenv), "Page %lu: totally zeroed page", \
+ (u_long)(pgno))); \
+} while (0)
+
+/*
+ * Note that 0 is, in general, a valid pgno, despite equaling PGNO_INVALID;
+ * we have to test it separately where it's not appropriate.
+ */
+#define IS_VALID_PGNO(x) ((x) <= vdp->last_pgno)
+
+/*
+ * VRFY_DBINFO is the fundamental structure; it either represents the database
+ * of subdatabases, or the sole database if there are no subdatabases.
+ */
+struct __vrfy_dbinfo {
+ DB_THREAD_INFO *thread_info;
+ /* Info about this database in particular. */
+ DBTYPE type;
+
+ /* List of subdatabase meta pages, if any. */
+ LIST_HEAD(__subdbs, __vrfy_childinfo) subdbs;
+
+ /* File-global info--stores VRFY_PAGEINFOs for each page. */
+ DB *pgdbp;
+
+ /* Child database--stores VRFY_CHILDINFOs of each page. */
+ DB *cdbp;
+
+ /* Page info structures currently in use. */
+ LIST_HEAD(__activepips, __vrfy_pageinfo) activepips;
+
+ /*
+ * DB we use to keep track of which pages are linked somehow
+ * during verification. 0 is the default, "unseen"; 1 is seen.
+ */
+ DB *pgset;
+
+ /*
+ * This is a database we use during salvaging to keep track of which
+ * overflow and dup pages we need to come back to at the end and print
+ * with key "UNKNOWN". Pages which print with a good key get set
+ * to SALVAGE_IGNORE; others get set, as appropriate, to SALVAGE_LDUP,
+ * SALVAGE_LRECNODUP, SALVAGE_OVERFLOW for normal db overflow pages,
+ * and SALVAGE_BTREE, SALVAGE_LRECNO, and SALVAGE_HASH for subdb
+ * pages.
+ */
+#define SALVAGE_INVALID 0
+#define SALVAGE_IGNORE 1
+#define SALVAGE_LDUP 2
+#define SALVAGE_IBTREE 3
+#define SALVAGE_OVERFLOW 4
+#define SALVAGE_LBTREE 5
+#define SALVAGE_HASH 6
+#define SALVAGE_LRECNO 7
+#define SALVAGE_LRECNODUP 8
+ DB *salvage_pages;
+
+ db_pgno_t last_pgno;
+ db_pgno_t meta_last_pgno;
+ db_pgno_t pgs_remaining; /* For dbp->db_feedback(). */
+
+ /*
+ * These are used during __bam_vrfy_subtree to keep track, while
+ * walking up and down the Btree structure, of the prev- and next-page
+ * chain of leaf pages and verify that it's intact. Also, make sure
+ * that this chain contains pages of only one type.
+ */
+ db_pgno_t prev_pgno;
+ db_pgno_t next_pgno;
+ u_int8_t leaf_type;
+
+ /* Queue needs these to verify data pages in the first pass. */
+ u_int32_t re_pad; /* Record pad character. */
+ u_int32_t re_len; /* Record length. */
+ u_int32_t rec_page;
+ u_int32_t page_ext;
+ u_int32_t first_recno;
+ u_int32_t last_recno;
+ int nextents;
+ db_pgno_t *extents;
+
+#define SALVAGE_PRINTABLE 0x01 /* Output printable chars literally. */
+#define SALVAGE_PRINTHEADER 0x02 /* Print the unknown-key header. */
+#define SALVAGE_PRINTFOOTER 0x04 /* Print the unknown-key footer. */
+#define SALVAGE_HASSUBDBS 0x08 /* There are subdatabases to salvage. */
+#define VRFY_LEAFCHAIN_BROKEN 0x10 /* Lost one or more Btree leaf pgs. */
+#define VRFY_QMETA_SET 0x20 /* We've seen a QUEUE meta page and
+ set things up for it. */
+ u_int32_t flags;
+}; /* VRFY_DBINFO */
+
+/*
+ * The amount of state information we need per-page is small enough that
+ * it's not worth the trouble to define separate structures for each
+ * possible type of page, and since we're doing verification with these we
+ * have to be open to the possibility that page N will be of a completely
+ * unexpected type anyway. So we define one structure here with all the
+ * info we need for inter-page verification.
+ */
+struct __vrfy_pageinfo {
+ u_int8_t type;
+ u_int8_t bt_level;
+ u_int8_t unused1;
+ u_int8_t unused2;
+ db_pgno_t pgno;
+ db_pgno_t prev_pgno;
+ db_pgno_t next_pgno;
+
+ /* meta pages */
+ db_pgno_t root;
+ db_pgno_t free; /* Free list head. */
+
+ db_indx_t entries; /* Actual number of entries. */
+ u_int16_t unused;
+ db_recno_t rec_cnt; /* Record count. */
+ u_int32_t re_pad; /* Record pad character. */
+ u_int32_t re_len; /* Record length. */
+ u_int32_t bt_minkey;
+ u_int32_t h_ffactor;
+ u_int32_t h_nelem;
+
+ /* overflow pages */
+ /*
+ * Note that refcount is the refcount for an overflow page; pi_refcount
+ * is this structure's own refcount!
+ */
+ u_int32_t refcount;
+ u_int32_t olen;
+
+#define VRFY_DUPS_UNSORTED 0x0001 /* Have to flag the negative! */
+#define VRFY_HAS_CHKSUM 0x0002
+#define VRFY_HAS_DUPS 0x0004
+#define VRFY_HAS_DUPSORT 0x0008 /* Has the flag set. */
+#define VRFY_HAS_PART_RANGE 0x0010 /* Has the flag set. */
+#define VRFY_HAS_PART_CALLBACK 0x0020 /* Has the flag set. */
+#define VRFY_HAS_RECNUMS 0x0040
+#define VRFY_HAS_SUBDBS 0x0080
+#define VRFY_INCOMPLETE 0x0100 /* Meta or item order checks incomp. */
+#define VRFY_IS_ALLZEROES 0x0200 /* Hash page we haven't touched? */
+#define VRFY_IS_FIXEDLEN 0x0400
+#define VRFY_IS_RECNO 0x0800
+#define VRFY_IS_RRECNO 0x1000
+#define VRFY_OVFL_LEAFSEEN 0x2000
+#define VRFY_HAS_COMPRESS 0x4000
+ u_int32_t flags;
+
+ LIST_ENTRY(__vrfy_pageinfo) links;
+ u_int32_t pi_refcount;
+}; /* VRFY_PAGEINFO */
+
+struct __vrfy_childinfo {
+ /* The following fields are set by the caller of __db_vrfy_childput. */
+ db_pgno_t pgno;
+
+#define V_DUPLICATE 1 /* off-page dup metadata */
+#define V_OVERFLOW 2 /* overflow page */
+#define V_RECNO 3 /* btree internal or leaf page */
+ u_int32_t type;
+ db_recno_t nrecs; /* record count on a btree subtree */
+ u_int32_t tlen; /* ovfl. item total size */
+
+ /* The following field is maintained by __db_vrfy_childput. */
+ u_int32_t refcnt; /* # of times parent points to child. */
+
+ LIST_ENTRY(__vrfy_childinfo) links;
+}; /* VRFY_CHILDINFO */
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_VERIFY_H_ */
diff --git a/db-4.8.30/dbinc/debug.h b/db-4.8.30/dbinc/debug.h
new file mode 100644
index 0000000..1c8cfd7
--- /dev/null
+++ b/db-4.8.30/dbinc/debug.h
@@ -0,0 +1,277 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_DEBUG_H_
+#define _DB_DEBUG_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Turn on additional error checking in gcc 3.X.
+ */
+#if !defined(__GNUC__) || __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
+#define __attribute__(s)
+#endif
+
+/*
+ * When running with #DIAGNOSTIC defined, we smash memory and do memory
+ * guarding with a special byte value.
+ */
+#define CLEAR_BYTE 0xdb
+#define GUARD_BYTE 0xdc
+
+/*
+ * DB assertions.
+ *
+ * Use __STDC__ rather than STDC_HEADERS, the #e construct is ANSI C specific.
+ */
+#if defined(DIAGNOSTIC) && defined(__STDC__)
+#define DB_ASSERT(env, e) \
+ ((e) ? (void)0 : __db_assert(env, #e, __FILE__, __LINE__))
+#else
+#define DB_ASSERT(env, e)
+#endif
+
+/*
+ * "Shut that bloody compiler up!"
+ *
+ * Unused, or not-used-yet variable. We need to write and then read the
+ * variable, some compilers are too bloody clever by half.
+ */
+#define COMPQUIET(n, v) do { \
+ (n) = (v); \
+ (n) = (n); \
+} while (0)
+
+/*
+ * Purify and other run-time tools complain about uninitialized reads/writes
+ * of structure fields whose only purpose is padding, as well as when heap
+ * memory that was never initialized is written to disk.
+ */
+#ifdef UMRW
+#define UMRW_SET(v) (v) = 0
+#else
+#define UMRW_SET(v)
+#endif
+
+/*
+ * Errors are in one of two areas: a Berkeley DB error, or a system-level
+ * error. We use db_strerror to translate the former and __os_strerror to
+ * translate the latter.
+ */
+typedef enum {
+ DB_ERROR_NOT_SET=0,
+ DB_ERROR_SET=1,
+ DB_ERROR_SYSTEM=2
+} db_error_set_t;
+
+/*
+ * Message handling. Use a macro instead of a function because va_list
+ * references to variadic arguments cannot be reset to the beginning of the
+ * variadic argument list (and then rescanned), by functions other than the
+ * original routine that took the variadic list of arguments.
+ */
+#if defined(STDC_HEADERS) || defined(__cplusplus)
+#define DB_REAL_ERR(dbenv, error, error_set, app_call, fmt) { \
+ va_list __ap; \
+ \
+ /* Call the application's callback function, if specified. */ \
+ va_start(__ap, fmt); \
+ if ((dbenv) != NULL && (dbenv)->db_errcall != NULL) \
+ __db_errcall(dbenv, error, error_set, fmt, __ap); \
+ va_end(__ap); \
+ \
+ /* \
+ * If the application specified a file descriptor, write to it. \
+ * If we wrote to neither the application's callback routine or \
+ * its file descriptor, and it's an application error message \
+ * using {DbEnv,Db}.{err,errx} or the application has never \
+ * configured an output channel, default by writing to stderr. \
+ */ \
+ va_start(__ap, fmt); \
+ if ((dbenv) == NULL || \
+ (dbenv)->db_errfile != NULL || \
+ ((dbenv)->db_errcall == NULL && \
+ ((app_call) || F_ISSET((dbenv)->env, ENV_NO_OUTPUT_SET)))) \
+ __db_errfile(dbenv, error, error_set, fmt, __ap); \
+ va_end(__ap); \
+}
+#else
+#define DB_REAL_ERR(dbenv, error, error_set, app_call, fmt) { \
+ va_list __ap; \
+ \
+ /* Call the application's callback function, if specified. */ \
+ va_start(__ap); \
+ if ((dbenv) != NULL && (dbenv)->db_errcall != NULL) \
+ __db_errcall(dbenv, error, error_set, fmt, __ap); \
+ va_end(__ap); \
+ \
+ /* \
+ * If the application specified a file descriptor, write to it. \
+ * If we wrote to neither the application's callback routine or \
+ * its file descriptor, and it's an application error message \
+ * using {DbEnv,Db}.{err,errx} or the application has never \
+ * configured an output channel, default by writing to stderr. \
+ */ \
+ va_start(__ap); \
+ if ((dbenv) == NULL || \
+ (dbenv)->db_errfile != NULL || \
+ ((dbenv)->db_errcall == NULL && \
+ ((app_call) || F_ISSET((dbenv)->env, ENV_NO_OUTPUT_SET)))) \
+ __db_errfile(env, error, error_set, fmt, __ap); \
+ va_end(__ap); \
+}
+#endif
+#if defined(STDC_HEADERS) || defined(__cplusplus)
+#define DB_REAL_MSG(dbenv, fmt) { \
+ va_list __ap; \
+ \
+ /* Call the application's callback function, if specified. */ \
+ va_start(__ap, fmt); \
+ if ((dbenv) != NULL && (dbenv)->db_msgcall != NULL) \
+ __db_msgcall(dbenv, fmt, __ap); \
+ va_end(__ap); \
+ \
+ /* \
+ * If the application specified a file descriptor, write to it. \
+ * If we wrote to neither the application's callback routine or \
+ * its file descriptor, write to stdout. \
+ */ \
+ va_start(__ap, fmt); \
+ if ((dbenv) == NULL || \
+ (dbenv)->db_msgfile != NULL || \
+ (dbenv)->db_msgcall == NULL) { \
+ __db_msgfile(dbenv, fmt, __ap); \
+ } \
+ va_end(__ap); \
+}
+#else
+#define DB_REAL_MSG(dbenv, fmt) { \
+ va_list __ap; \
+ \
+ /* Call the application's callback function, if specified. */ \
+ va_start(__ap); \
+ if ((dbenv) != NULL && (dbenv)->db_msgcall != NULL) \
+ __db_msgcall(dbenv, fmt, __ap); \
+ va_end(__ap); \
+ \
+ /* \
+ * If the application specified a file descriptor, write to it. \
+ * If we wrote to neither the application's callback routine or \
+ * its file descriptor, write to stdout. \
+ */ \
+ va_start(__ap); \
+ if ((dbenv) == NULL || \
+ (dbenv)->db_msgfile != NULL || \
+ (dbenv)->db_msgcall == NULL) { \
+ __db_msgfile(dbenv, fmt, __ap); \
+ } \
+ va_end(__ap); \
+}
+#endif
+
+/*
+ * Debugging macro to log operations.
+ * If DEBUG_WOP is defined, log operations that modify the database.
+ * If DEBUG_ROP is defined, log operations that read the database.
+ *
+ * D dbp
+ * T txn
+ * O operation (string)
+ * K key
+ * A data
+ * F flags
+ */
+#define LOG_OP(C, T, O, K, A, F) { \
+ DB_LSN __lsn; \
+ DBT __op; \
+ if (DBC_LOGGING((C))) { \
+ memset(&__op, 0, sizeof(__op)); \
+ __op.data = O; \
+ __op.size = strlen(O) + 1; \
+ (void)__db_debug_log((C)->env, T, &__lsn, 0, \
+ &__op, (C)->dbp->log_filename->id, K, A, F); \
+ } \
+}
+#ifdef DEBUG_ROP
+#define DEBUG_LREAD(C, T, O, K, A, F) LOG_OP(C, T, O, K, A, F)
+#else
+#define DEBUG_LREAD(C, T, O, K, A, F)
+#endif
+#ifdef DEBUG_WOP
+#define DEBUG_LWRITE(C, T, O, K, A, F) LOG_OP(C, T, O, K, A, F)
+#else
+#define DEBUG_LWRITE(C, T, O, K, A, F)
+#endif
+
+/*
+ * Hook for testing recovery at various places in the create/delete paths.
+ * Hook for testing subdb locks.
+ */
+#if CONFIG_TEST
+#define DB_TEST_SUBLOCKS(env, flags) do { \
+ if ((env)->test_abort == DB_TEST_SUBDB_LOCKS) \
+ (flags) |= DB_LOCK_NOWAIT; \
+} while (0)
+
+#define DB_ENV_TEST_RECOVERY(env, val, ret, name) do { \
+ int __ret; \
+ PANIC_CHECK((env)); \
+ if ((env)->test_copy == (val)) { \
+ /* COPY the FILE */ \
+ if ((__ret = __db_testcopy((env), NULL, (name))) != 0) \
+ (ret) = __env_panic((env), __ret); \
+ } \
+ if ((env)->test_abort == (val)) { \
+ /* ABORT the TXN */ \
+ (env)->test_abort = 0; \
+ (ret) = EINVAL; \
+ goto db_tr_err; \
+ } \
+} while (0)
+
+#define DB_TEST_RECOVERY(dbp, val, ret, name) do { \
+ ENV *__env = (dbp)->env; \
+ int __ret; \
+ PANIC_CHECK(__env); \
+ if (__env->test_copy == (val)) { \
+ /* Copy the file. */ \
+ if (F_ISSET((dbp), \
+ DB_AM_OPEN_CALLED) && (dbp)->mpf != NULL) \
+ (void)__db_sync(dbp); \
+ if ((__ret = \
+ __db_testcopy(__env, (dbp), (name))) != 0) \
+ (ret) = __env_panic(__env, __ret); \
+ } \
+ if (__env->test_abort == (val)) { \
+ /* Abort the transaction. */ \
+ __env->test_abort = 0; \
+ (ret) = EINVAL; \
+ goto db_tr_err; \
+ } \
+} while (0)
+
+#define DB_TEST_RECOVERY_LABEL db_tr_err:
+
+#define DB_TEST_WAIT(env, val) \
+ if ((val) != 0) \
+ __os_yield((env), (u_long)(val), 0)
+#else
+#define DB_TEST_SUBLOCKS(env, flags)
+#define DB_ENV_TEST_RECOVERY(env, val, ret, name)
+#define DB_TEST_RECOVERY(dbp, val, ret, name)
+#define DB_TEST_RECOVERY_LABEL
+#define DB_TEST_WAIT(env, val)
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_DEBUG_H_ */
diff --git a/db-4.8.30/dbinc/fop.h b/db-4.8.30/dbinc/fop.h
new file mode 100644
index 0000000..69ea61e
--- /dev/null
+++ b/db-4.8.30/dbinc/fop.h
@@ -0,0 +1,32 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2001-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_FOP_H_
+#define _DB_FOP_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define MAKE_INMEM(D) do { \
+ F_SET((D), DB_AM_INMEM); \
+ (void)__memp_set_flags((D)->mpf, DB_MPOOL_NOFILE, 1); \
+} while (0)
+
+#define CLR_INMEM(D) do { \
+ F_CLR((D), DB_AM_INMEM); \
+ (void)__memp_set_flags((D)->mpf, DB_MPOOL_NOFILE, 0); \
+} while (0)
+
+#include "dbinc_auto/fileops_auto.h"
+#include "dbinc_auto/fileops_ext.h"
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_FOP_H_ */
diff --git a/db-4.8.30/dbinc/globals.h b/db-4.8.30/dbinc/globals.h
new file mode 100644
index 0000000..625fdfa
--- /dev/null
+++ b/db-4.8.30/dbinc/globals.h
@@ -0,0 +1,123 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_GLOBALS_H_
+#define _DB_GLOBALS_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*******************************************************
+ * Global variables.
+ *
+ * Held in a single structure to minimize the name-space pollution.
+ *******************************************************/
+#ifdef HAVE_VXWORKS
+#include "semLib.h"
+#endif
+
+typedef struct __db_globals {
+#ifdef HAVE_BREW
+ struct tm ltm; /* BREW localtime structure */
+#endif
+#ifdef HAVE_VXWORKS
+ u_int32_t db_global_init; /* VxWorks: inited */
+ SEM_ID db_global_lock; /* VxWorks: global semaphore */
+#endif
+
+ char *db_line; /* DB display string. */
+
+ char error_buf[40]; /* Error string buffer. */
+
+ int uid_init; /* srand set in UID generator */
+
+ u_long rand_next; /* rand/srand value */
+
+ u_int32_t fid_serial; /* file id counter */
+
+ int db_errno; /* Errno value if not available */
+
+ int (*j_close) __P((int)); /* Underlying OS interface jump table.*/
+ void (*j_dirfree) __P((char **, int));
+ int (*j_dirlist) __P((const char *, char ***, int *));
+ int (*j_exists) __P((const char *, int *));
+ void (*j_free) __P((void *));
+ int (*j_fsync) __P((int));
+ int (*j_ftruncate) __P((int, off_t));
+ int (*j_ioinfo) __P((const char *,
+ int, u_int32_t *, u_int32_t *, u_int32_t *));
+ void *(*j_malloc) __P((size_t));
+ int (*j_file_map) __P((DB_ENV *, char *, size_t, int, void **));
+ int (*j_file_unmap) __P((DB_ENV *, void *));
+ int (*j_open) __P((const char *, int, ...));
+ ssize_t (*j_pread) __P((int, void *, size_t, off_t));
+ ssize_t (*j_pwrite) __P((int, const void *, size_t, off_t));
+ ssize_t (*j_read) __P((int, void *, size_t));
+ void *(*j_realloc) __P((void *, size_t));
+ int (*j_region_map) __P((DB_ENV *, char *, size_t, int *, void **));
+ int (*j_region_unmap) __P((DB_ENV *, void *));
+ int (*j_rename) __P((const char *, const char *));
+ int (*j_seek) __P((int, off_t, int));
+ int (*j_unlink) __P((const char *));
+ ssize_t (*j_write) __P((int, const void *, size_t));
+ int (*j_yield) __P((u_long, u_long));
+} DB_GLOBALS;
+
+#ifdef HAVE_BREW
+#define DB_GLOBAL(v) \
+ ((DB_GLOBALS *)(((BDBApp *)GETAPPINSTANCE())->db_global_values))->v
+#else
+#ifdef DB_INITIALIZE_DB_GLOBALS
+DB_GLOBALS __db_global_values = {
+#ifdef HAVE_VXWORKS
+ 0, /* VxWorks: initialized */
+ NULL, /* VxWorks: global semaphore */
+#endif
+
+ "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=",
+ { 0 },
+ 0,
+ 0,
+ 0,
+ 0,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+#else
+extern DB_GLOBALS __db_global_values;
+#endif
+
+#define DB_GLOBAL(v) __db_global_values.v
+#endif /* HAVE_BREW */
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_GLOBALS_H_ */
diff --git a/db-4.8.30/dbinc/hash.h b/db-4.8.30/dbinc/hash.h
new file mode 100644
index 0000000..ae3fb2e
--- /dev/null
+++ b/db-4.8.30/dbinc/hash.h
@@ -0,0 +1,169 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * Margo Seltzer. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_HASH_H_
+#define _DB_HASH_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Hash internal structure. */
+typedef struct hash_t {
+ db_pgno_t meta_pgno; /* Page number of the meta data page. */
+ u_int32_t h_ffactor; /* Fill factor. */
+ u_int32_t h_nelem; /* Number of elements. */
+ /* Hash and compare functions. */
+ u_int32_t (*h_hash) __P((DB *, const void *, u_int32_t));
+ int (*h_compare) __P((DB *, const DBT *, const DBT *));
+} HASH;
+
+/* Cursor structure definitions. */
+typedef struct cursor_t {
+ /* struct __dbc_internal */
+ __DBC_INTERNAL
+
+ /* Hash private part */
+
+ /* Per-thread information */
+ DB_LOCK hlock; /* Metadata page lock. */
+ HMETA *hdr; /* Pointer to meta-data page. */
+ PAGE *split_buf; /* Temporary buffer for splits. */
+
+ /* Hash cursor information */
+ db_pgno_t bucket; /* Bucket we are traversing. */
+ db_pgno_t lbucket; /* Bucket for which we are locked. */
+ db_indx_t dup_off; /* Offset within a duplicate set. */
+ db_indx_t dup_len; /* Length of current duplicate. */
+ db_indx_t dup_tlen; /* Total length of duplicate entry. */
+ u_int32_t seek_size; /* Number of bytes we need for add. */
+ db_pgno_t seek_found_page;/* Page on which we can insert. */
+ db_indx_t seek_found_indx;/* Insert position for item. */
+ u_int32_t order; /* Relative order among deleted curs. */
+
+#define H_CONTINUE 0x0001 /* Join--search strictly fwd for data */
+#define H_DELETED 0x0002 /* Cursor item is deleted. */
+#define H_DUPONLY 0x0004 /* Dups only; do not change key. */
+#define H_EXPAND 0x0008 /* Table expanded. */
+#define H_ISDUP 0x0010 /* Cursor is within duplicate set. */
+#define H_NEXT_NODUP 0x0020 /* Get next non-dup entry. */
+#define H_NOMORE 0x0040 /* No more entries in bucket. */
+#define H_OK 0x0080 /* Request succeeded. */
+ u_int32_t flags;
+} HASH_CURSOR;
+
+/* Test string. */
+#define CHARKEY "%$sniglet^&"
+
+/* Overflow management */
+/*
+ * The spares table indicates the page number at which each doubling begins.
+ * From this page number we subtract the number of buckets already allocated
+ * so that we can do a simple addition to calculate the page number here.
+ */
+#define BS_TO_PAGE(bucket, spares) \
+ ((bucket) + (spares)[__db_log2((bucket) + 1)])
+#define BUCKET_TO_PAGE(I, B) (BS_TO_PAGE((B), (I)->hdr->spares))
+
+/* Constraints about much data goes on a page. */
+
+#define MINFILL 4
+#define ISBIG(I, N) (((N) > ((I)->hdr->dbmeta.pagesize / MINFILL)) ? 1 : 0)
+
+/* Shorthands for accessing structure */
+#define NDX_INVALID 0xFFFF
+#define BUCKET_INVALID 0xFFFFFFFF
+
+/* On page duplicates are stored as a string of size-data-size triples. */
+#define DUP_SIZE(len) ((len) + 2 * sizeof(db_indx_t))
+
+/* Log messages types (these are subtypes within a record type) */
+#define PAIR_KEYMASK 0x1
+#define PAIR_DATAMASK 0x2
+#define PAIR_DUPMASK 0x4
+#define PAIR_MASK 0xf
+#define PAIR_ISKEYBIG(N) (N & PAIR_KEYMASK)
+#define PAIR_ISDATABIG(N) (N & PAIR_DATAMASK)
+#define PAIR_ISDATADUP(N) (N & PAIR_DUPMASK)
+#define OPCODE_OF(N) (N & ~PAIR_MASK)
+
+#define PUTPAIR 0x20
+#define DELPAIR 0x30
+#define PUTOVFL 0x40
+#define DELOVFL 0x50
+#define HASH_UNUSED1 0x60
+#define HASH_UNUSED2 0x70
+#define SPLITOLD 0x80
+#define SPLITNEW 0x90
+#define SORTPAGE 0x100
+
+/* Flags to control behavior of __ham_del_pair */
+#define HAM_DEL_NO_CURSOR 0x01 /* Don't do any cursor adjustment */
+#define HAM_DEL_NO_RECLAIM 0x02 /* Don't reclaim empty pages */
+/* Just delete onpage items (even if they are references to off-page items). */
+#define HAM_DEL_IGNORE_OFFPAGE 0x04
+
+typedef enum {
+ DB_HAM_CURADJ_DEL = 1,
+ DB_HAM_CURADJ_ADD = 2,
+ DB_HAM_CURADJ_ADDMOD = 3,
+ DB_HAM_CURADJ_DELMOD = 4
+} db_ham_curadj;
+
+typedef enum {
+ DB_HAM_CHGPG = 1,
+ DB_HAM_DELFIRSTPG = 2,
+ DB_HAM_DELMIDPG = 3,
+ DB_HAM_DELLASTPG = 4,
+ DB_HAM_DUP = 5,
+ DB_HAM_SPLIT = 6
+} db_ham_mode;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/hash_auto.h"
+#include "dbinc_auto/hash_ext.h"
+#include "dbinc/db_am.h"
+#endif /* !_DB_HASH_H_ */
diff --git a/db-4.8.30/dbinc/hmac.h b/db-4.8.30/dbinc/hmac.h
new file mode 100644
index 0000000..c79abbf
--- /dev/null
+++ b/db-4.8.30/dbinc/hmac.h
@@ -0,0 +1,39 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_HMAC_H_
+#define _DB_HMAC_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Algorithm specific information.
+ */
+/*
+ * SHA1 checksumming
+ */
+typedef struct {
+ u_int32_t state[5];
+ u_int32_t count[2];
+ unsigned char buffer[64];
+} SHA1_CTX;
+
+/*
+ * AES assumes the SHA1 checksumming (also called MAC)
+ */
+#define DB_MAC_MAGIC "mac derivation key magic value"
+#define DB_ENC_MAGIC "encryption and decryption key value magic"
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/hmac_ext.h"
+#endif /* !_DB_HMAC_H_ */
diff --git a/db-4.8.30/dbinc/lock.h b/db-4.8.30/dbinc/lock.h
new file mode 100644
index 0000000..0d00a55
--- /dev/null
+++ b/db-4.8.30/dbinc/lock.h
@@ -0,0 +1,310 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_LOCK_H_
+#define _DB_LOCK_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DB_LOCK_DEFAULT_N 1000 /* Default # of locks in region. */
+
+/*
+ * The locker id space is divided between the transaction manager and the lock
+ * manager. Lock IDs start at 1 and go to DB_LOCK_MAXID. Txn IDs start at
+ * DB_LOCK_MAXID + 1 and go up to TXN_MAXIMUM.
+ */
+#define DB_LOCK_INVALIDID 0
+#define DB_LOCK_MAXID 0x7fffffff
+
+/*
+ * Out of band value for a lock. Locks contain an offset into a lock region,
+ * so we use an invalid region offset to indicate an invalid or unset lock.
+ */
+#define LOCK_INVALID INVALID_ROFF
+#define LOCK_ISSET(lock) ((lock).off != LOCK_INVALID)
+#define LOCK_INIT(lock) ((lock).off = LOCK_INVALID)
+
+/*
+ * Macro to identify a write lock for the purpose of counting locks
+ * for the NUMWRITES option to deadlock detection.
+ */
+#define IS_WRITELOCK(m) \
+ ((m) == DB_LOCK_WRITE || (m) == DB_LOCK_WWRITE || \
+ (m) == DB_LOCK_IWRITE || (m) == DB_LOCK_IWR)
+
+/*
+ * Macros to lock/unlock the lock region as a whole. Mostly used for
+ * initialization.
+ */
+#define LOCK_REGION_LOCK(env) \
+ MUTEX_LOCK(env, ((DB_LOCKREGION *) \
+ (env)->lk_handle->reginfo.primary)->mtx_region)
+#define LOCK_REGION_UNLOCK(env) \
+ MUTEX_UNLOCK(env, ((DB_LOCKREGION *) \
+ (env)->lk_handle->reginfo.primary)->mtx_region)
+
+/*
+ * DB_LOCKREGION --
+ * The lock shared region.
+ */
+
+typedef struct __db_lockregion {
+ db_mutex_t mtx_region; /* Region mutex. */
+
+ u_int32_t need_dd; /* flag for deadlock detector */
+ u_int32_t detect; /* run dd on every conflict */
+ db_timespec next_timeout; /* next time to expire a lock */
+ db_mutex_t mtx_dd; /* mutex for lock object dd list. */
+ db_mutex_t mtx_lockers; /* mutex for locker allocation. */
+ SH_TAILQ_HEAD(__dobj) dd_objs; /* objects with waiters */
+ /* free locker header */
+ SH_TAILQ_HEAD(__flocker) free_lockers;
+ SH_TAILQ_HEAD(__lkrs) lockers; /* list of lockers */
+
+ db_timeout_t lk_timeout; /* timeout for locks. */
+ db_timeout_t tx_timeout; /* timeout for txns. */
+
+ u_int32_t locker_t_size; /* size of locker hash table */
+ u_int32_t object_t_size; /* size of object hash table */
+ u_int32_t part_t_size; /* number of partitions */
+
+ roff_t conf_off; /* offset of conflicts array */
+ roff_t obj_off; /* offset of object hash table */
+ roff_t part_off; /* offset of partition array */
+ roff_t stat_off; /* offset to object hash stats */
+ roff_t locker_off; /* offset of locker hash table */
+
+ u_int32_t lock_id; /* Current lock(er) id to allocate. */
+ u_int32_t cur_maxid; /* Current max lock(er) id. */
+ u_int32_t nlockers; /* Current number of lockers. */
+ int nmodes; /* Number of modes in conflict table. */
+ DB_LOCK_STAT stat; /* stats about locking. */
+} DB_LOCKREGION;
+
+/*
+ * Since we will store DBTs in shared memory, we need the equivalent of a
+ * DBT that will work in shared memory.
+ */
+typedef struct __sh_dbt {
+ u_int32_t size; /* Byte length. */
+ roff_t off; /* Region offset. */
+} SH_DBT;
+
+#define SH_DBT_PTR(p) ((void *)(((u_int8_t *)(p)) + (p)->off))
+
+/*
+ * Object structures; these live in the object hash table.
+ */
+typedef struct __db_lockobj {
+ u_int32_t indx; /* Hash index of this object. */
+ u_int32_t generation; /* Generation of this object. */
+ SH_DBT lockobj; /* Identifies object locked. */
+ SH_TAILQ_ENTRY links; /* Links for free list or hash list. */
+ SH_TAILQ_ENTRY dd_links; /* Links for dd list. */
+ SH_TAILQ_HEAD(__waitl) waiters; /* List of waiting locks. */
+ SH_TAILQ_HEAD(__holdl) holders; /* List of held locks. */
+ /* Declare room in the object to hold
+ * typical DB lock structures so that
+ * we do not have to allocate them from
+ * shalloc at run-time. */
+ u_int8_t objdata[sizeof(struct __db_ilock)];
+} DB_LOCKOBJ;
+
+/*
+ * Locker structures; these live in the locker hash table.
+ */
+struct __db_locker {
+ u_int32_t id; /* Locker id. */
+
+ pid_t pid; /* Process owning locker ID */
+ db_threadid_t tid; /* Thread owning locker ID */
+
+ u_int32_t dd_id; /* Deadlock detector id. */
+
+ u_int32_t nlocks; /* Number of locks held. */
+ u_int32_t nwrites; /* Number of write locks held. */
+
+ roff_t master_locker; /* Locker of master transaction. */
+ roff_t parent_locker; /* Parent of this child. */
+ SH_LIST_HEAD(_child) child_locker; /* List of descendant txns;
+ only used in a "master"
+ txn. */
+ SH_LIST_ENTRY child_link; /* Links transactions in the family;
+ elements of the child_locker
+ list. */
+ SH_TAILQ_ENTRY links; /* Links for free and hash list. */
+ SH_TAILQ_ENTRY ulinks; /* Links in-use list. */
+ SH_LIST_HEAD(_held) heldby; /* Locks held by this locker. */
+ db_timespec lk_expire; /* When current lock expires. */
+ db_timespec tx_expire; /* When this txn expires. */
+ db_timeout_t lk_timeout; /* How long do we let locks live. */
+
+#define DB_LOCKER_DIRTY 0x0001
+#define DB_LOCKER_INABORT 0x0002
+#define DB_LOCKER_TIMEOUT 0x0004
+ u_int32_t flags;
+};
+
+/*
+ * Map a hash index into a partition.
+ */
+#define LOCK_PART(reg, ndx) (ndx % (reg)->part_t_size)
+
+/*
+ * Structure that contains information about a lock table partition.
+ */
+typedef struct __db_lockpart{
+ db_mutex_t mtx_part; /* mutex for partition*/
+ /* free lock header */
+ SH_TAILQ_HEAD(__flock) free_locks;
+ /* free obj header */
+ SH_TAILQ_HEAD(__fobj) free_objs;
+#ifdef HAVE_STATISTICS
+ DB_LOCK_PSTAT part_stat; /* Partition stats. */
+#endif
+} DB_LOCKPART;
+
+#define FREE_LOCKS(lt, part) ((lt)->part_array[part].free_locks)
+#define FREE_OBJS(lt, part) ((lt)->part_array[part].free_objs)
+
+/*
+ * DB_LOCKTAB --
+ * The primary library lock data structure (i.e., the one referenced
+ * by the environment, as opposed to the internal one laid out in the region.)
+ */
+struct __db_locktab {
+ ENV *env; /* Environment. */
+ REGINFO reginfo; /* Region information. */
+ u_int8_t *conflicts; /* Pointer to conflict matrix. */
+ DB_LOCKPART *part_array; /* Beginning of partition array. */
+#ifdef HAVE_STATISTICS
+ DB_LOCK_HSTAT *obj_stat; /* Object hash stats array. */
+#endif
+ DB_HASHTAB *obj_tab; /* Beginning of object hash table. */
+ DB_HASHTAB *locker_tab; /* Beginning of locker hash table. */
+};
+
+/*
+ * Test for conflicts.
+ *
+ * Cast HELD and WANTED to ints, they are usually db_lockmode_t enums.
+ */
+#define CONFLICTS(T, R, HELD, WANTED) \
+ (T)->conflicts[((int)HELD) * (R)->nmodes + ((int)WANTED)]
+
+#define OBJ_LINKS_VALID(L) ((L)->links.stqe_prev != -1)
+
+struct __db_lock {
+ /*
+ * Wait on mutex to wait on lock. You reference your own mutex with
+ * ID 0 and others reference your mutex with ID 1.
+ */
+ db_mutex_t mtx_lock;
+
+ roff_t holder; /* Who holds this lock. */
+ u_int32_t gen; /* Generation count. */
+ SH_TAILQ_ENTRY links; /* Free or holder/waiter list. */
+ SH_LIST_ENTRY locker_links; /* List of locks held by a locker. */
+ u_int32_t refcount; /* Reference count the lock. */
+ db_lockmode_t mode; /* What sort of lock. */
+ roff_t obj; /* Relative offset of object struct. */
+ u_int32_t indx; /* Hash index of this object. */
+ db_status_t status; /* Status of this lock. */
+};
+
+/*
+ * Flag values for __lock_put_internal:
+ * DB_LOCK_DOALL: Unlock all references in this lock (instead of only 1).
+ * DB_LOCK_FREE: Free the lock (used in checklocker).
+ * DB_LOCK_NOPROMOTE: Don't bother running promotion when releasing locks
+ * (used by __lock_put_internal).
+ * DB_LOCK_UNLINK: Remove from the locker links (used in checklocker).
+ * Make sure that these do not conflict with the interface flags because
+ * we pass some of those around.
+ */
+#define DB_LOCK_DOALL 0x010000
+#define DB_LOCK_FREE 0x040000
+#define DB_LOCK_NOPROMOTE 0x080000
+#define DB_LOCK_UNLINK 0x100000
+#define DB_LOCK_NOWAITERS 0x400000
+
+/*
+ * Macros to get/release different types of mutexes.
+ */
+/*
+ * Operations on lock objects must be protected by a mutex, either on their
+ * partition or on the lock region. Lock structures associated with that
+ * object are protected as well. Each partition has a free list of objects
+ * and lock structures protected by that mutex. We want to avoid getting
+ * multiple mutexes, particularly in __lock_vec, when there is only a
+ * single partition. If there is only one partition, then all the calls
+ * to LOCK_SYSTEM_LOCK(UNLOCK) actually acquire(release) a lock system
+ * wide mutex and MUTEX_LOCK(UNLOCK)_PARTITION are no-ops. If the number
+ * of partitions is greater than one, then LOCK_SYSTEM_LOCK(UNLOCK) is a
+ * no-op, and MUTEX_LOCK(UNLOCK)_PARTITION acquire a mutex on a particular
+ * partition of the lock table.
+ */
+#define LOCK_SYSTEM_LOCK(lt, reg) do { \
+ if ((reg)->part_t_size == 1) \
+ MUTEX_LOCK((lt)->env, (reg)->mtx_region); \
+} while (0)
+#define LOCK_SYSTEM_UNLOCK(lt, reg) do { \
+ if ((reg)->part_t_size == 1) \
+ MUTEX_UNLOCK((lt)->env, (reg)->mtx_region); \
+} while (0)
+#define MUTEX_LOCK_PARTITION(lt, reg, p) do { \
+ if ((reg)->part_t_size != 1) \
+ MUTEX_LOCK((lt)->env, (lt)->part_array[p].mtx_part); \
+} while (0)
+#define MUTEX_UNLOCK_PARTITION(lt, reg, p) do { \
+ if ((reg)->part_t_size != 1) \
+ MUTEX_UNLOCK((lt)->env, (lt)->part_array[p].mtx_part); \
+} while (0)
+
+#define OBJECT_LOCK(lt, reg, obj, ndx) do { \
+ ndx = __lock_ohash(obj) % (reg)->object_t_size; \
+ MUTEX_LOCK_PARTITION(lt, reg, LOCK_PART(reg, ndx)); \
+} while (0)
+
+#define OBJECT_LOCK_NDX(lt, reg, ndx) \
+ MUTEX_LOCK_PARTITION(lt, reg, LOCK_PART(reg, ndx));
+
+#define OBJECT_UNLOCK(lt, reg, ndx) \
+ MUTEX_UNLOCK_PARTITION(lt, reg, LOCK_PART(reg, ndx));
+
+/*
+ * Protect the object deadlock detector queue and the locker allocation
+ * and active queues
+ */
+#define LOCK_DD(env, region) \
+ MUTEX_LOCK(env, (region)->mtx_dd)
+#define UNLOCK_DD(env, region) \
+ MUTEX_UNLOCK(env, (region)->mtx_dd)
+#define LOCK_LOCKERS(env, region) \
+ MUTEX_LOCK(env, (region)->mtx_lockers)
+#define UNLOCK_LOCKERS(env, region) \
+ MUTEX_UNLOCK(env, (region)->mtx_lockers)
+
+/*
+ * __lock_locker_hash --
+ * Hash function for entering lockers into the locker hash table.
+ * Since these are simply 32-bit unsigned integers at the moment,
+ * just return the locker value.
+ */
+#define __lock_locker_hash(locker) (locker)
+#define LOCKER_HASH(lt, reg, locker, ndx) \
+ ndx = __lock_locker_hash(locker) % (reg)->locker_t_size;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/lock_ext.h"
+#endif /* !_DB_LOCK_H_ */
diff --git a/db-4.8.30/dbinc/log.h b/db-4.8.30/dbinc/log.h
new file mode 100644
index 0000000..cc397eb
--- /dev/null
+++ b/db-4.8.30/dbinc/log.h
@@ -0,0 +1,448 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_LOG_H_
+#define _DB_LOG_H_
+
+#include "dbinc/db_swap.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*******************************************************
+ * DBREG:
+ * The DB file register code keeps track of open files. It's stored
+ * in the log subsystem's shared region, and so appears in the log.h
+ * header file, but is logically separate.
+ * The dbp may not be open if we are recovering the abort of a create.
+ *******************************************************/
+/*
+ * The per-process table that maps log file-id's to DB structures.
+ */
+typedef struct __db_entry {
+ DB *dbp; /* Open dbp for this file id. */
+ int deleted; /* File was not found during open. */
+} DB_ENTRY;
+
+/*
+ * FNAME --
+ * File name and id.
+ */
+struct __fname {
+ SH_TAILQ_ENTRY q; /* File name queue. */
+
+ pid_t pid; /* Process that owns this. */
+ int32_t id; /* Logging file id. */
+ int32_t old_id; /* Saved logging file id. */
+ DBTYPE s_type; /* Saved DB type. */
+
+ roff_t fname_off; /* File name offset. */
+ roff_t dname_off; /* Database name offset. */
+ db_pgno_t meta_pgno; /* Page number of the meta page. */
+ u_int8_t ufid[DB_FILE_ID_LEN]; /* Unique file id. */
+
+ u_int32_t create_txnid; /*
+ * Txn ID of the DB create, stored so
+ * we can log it at register time.
+ */
+ db_mutex_t mutex; /* mutex from db handle. */
+ /* number of txn referencing + 1 for the db handle. */
+ u_int32_t txn_ref;
+
+#define DB_FNAME_CLOSED 0x01 /* DBP was closed. */
+#define DB_FNAME_DURABLE 0x02 /* File is durable. */
+#define DB_FNAME_INMEM 0x04 /* File is in memory. */
+#define DB_FNAME_NOTLOGGED 0x08 /* Log of close failed. */
+#define DB_FNAME_RECOVER 0x10 /* File was opened by recovery code. */
+#define DB_FNAME_RESTORED 0x20 /* File may be in restored txn. */
+ u_int32_t flags;
+};
+
+/* File open/close register log record opcodes. */
+#define DBREG_CHKPNT 1 /* Checkpoint: file name/id dump. */
+#define DBREG_CLOSE 2 /* File close. */
+#define DBREG_OPEN 3 /* File open. */
+#define DBREG_PREOPEN 4 /* Open in mpool only. */
+#define DBREG_RCLOSE 5 /* File close after recovery. */
+#define DBREG_REOPEN 6 /* Open for in-memory database. */
+
+/*******************************************************
+ * LOG:
+ * The log subsystem information.
+ *******************************************************/
+struct __hdr; typedef struct __hdr HDR;
+struct __log; typedef struct __log LOG;
+struct __log_persist; typedef struct __log_persist LOGP;
+
+#define LFPREFIX "log." /* Log file name prefix. */
+#define LFNAME "log.%010d" /* Log file name template. */
+#define LFNAME_V1 "log.%05d" /* Log file name template, rev 1. */
+
+#define LG_MAX_DEFAULT (10 * MEGABYTE) /* 10 MB. */
+#define LG_MAX_INMEM (256 * 1024) /* 256 KB. */
+#define LG_BSIZE_INMEM (1 * MEGABYTE) /* 1 MB. */
+
+/*
+ * Allocate a few bytes under a power-of-two value. BDB doesn't care if it's
+ * a power-of-two or not, and requesting slightly under a power-of-two allows
+ * stupid allocators to avoid wasting space.
+ */
+#define LG_BASE_REGION_SIZE (130000) /* 128KB - 1072B */
+#define LG_BSIZE_DEFAULT (32000) /* 32 KB - 768B */
+#define LG_CURSOR_BUF_SIZE (32000) /* 32 KB - 768B */
+
+/*
+ * DB_LOG
+ * Per-process log structure.
+ */
+struct __db_log {
+ /*
+ * These fields need to be protected for multi-threaded support.
+ */
+ db_mutex_t mtx_dbreg; /* Mutex for thread protection. */
+
+ DB_ENTRY *dbentry; /* Recovery file-id mapping. */
+#define DB_GROW_SIZE 64
+ int32_t dbentry_cnt; /* Entries. Grows by DB_GROW_SIZE. */
+
+ /*
+ * These fields are only accessed when the region lock is held, so
+ * they do not have to be protected by the thread lock as well.
+ */
+ u_int32_t lfname; /* Log file "name". */
+ DB_FH *lfhp; /* Log file handle. */
+ time_t lf_timestamp; /* Log file timestamp. */
+
+ u_int8_t *bufp; /* Region buffer. */
+
+ /* These fields are not thread protected. */
+ ENV *env; /* Environment */
+ REGINFO reginfo; /* Region information. */
+
+#define DBLOG_AUTOREMOVE 0x01 /* Autoremove log files. */
+#define DBLOG_DIRECT 0x02 /* Do direct I/O on the log. */
+#define DBLOG_DSYNC 0x04 /* Set OS_DSYNC on the log. */
+#define DBLOG_FORCE_OPEN 0x08 /* Force the DB open even if it appears
+ * to be deleted. */
+#define DBLOG_INMEMORY 0x10 /* Logging is in memory. */
+#define DBLOG_OPENFILES 0x20 /* Prepared files need to be open. */
+#define DBLOG_RECOVER 0x40 /* We are in recovery. */
+#define DBLOG_ZERO 0x80 /* Zero fill the log. */
+ u_int32_t flags;
+};
+
+/*
+ * HDR --
+ * Log record header.
+ */
+struct __hdr {
+ u_int32_t prev; /* Previous offset. */
+ u_int32_t len; /* Current length. */
+ u_int8_t chksum[DB_MAC_KEY]; /* Current checksum. */
+ u_int8_t iv[DB_IV_BYTES]; /* IV */
+ u_int32_t orig_size; /* Original size of log record */
+ /* !!! - 'size' is not written to log, must be last in hdr */
+ size_t size; /* Size of header to use */
+};
+
+/*
+ * LOG_HDR_SUM -- XOR in prev and len
+ * This helps avoids the race misreading the log while it
+ * it is being updated.
+ */
+#define LOG_HDR_SUM(crypto, hdr, sum) do { \
+ if (crypto) { \
+ ((u_int32_t *)sum)[0] ^= ((HDR *)hdr)->prev; \
+ ((u_int32_t *)sum)[1] ^= ((HDR *)hdr)->len; \
+ } else { \
+ ((u_int32_t *)sum)[0] ^= \
+ ((HDR *)hdr)->prev ^ ((HDR *)hdr)->len; \
+ } \
+} while (0)
+
+/*
+ * We use HDR internally, and then when we write out, we write out
+ * prev, len, and then a 4-byte checksum if normal operation or
+ * a crypto-checksum and IV and original size if running in crypto
+ * mode. We must store the original size in case we pad. Set the
+ * size when we set up the header. We compute a DB_MAC_KEY size
+ * checksum regardless, but we can safely just use the first 4 bytes.
+ */
+#define HDR_NORMAL_SZ 12
+#define HDR_CRYPTO_SZ 12 + DB_MAC_KEY + DB_IV_BYTES
+
+struct __log_persist {
+ u_int32_t magic; /* DB_LOGMAGIC */
+ u_int32_t version; /* DB_LOGVERSION */
+
+ u_int32_t log_size; /* Log file size. */
+ u_int32_t notused; /* Historically the log file mode. */
+};
+
+/* Macros to lock/unlock the log region as a whole. */
+#define LOG_SYSTEM_LOCK(env) \
+ MUTEX_LOCK(env, ((LOG *) \
+ (env)->lg_handle->reginfo.primary)->mtx_region)
+#define LOG_SYSTEM_UNLOCK(env) \
+ MUTEX_UNLOCK(env, ((LOG *) \
+ (env)->lg_handle->reginfo.primary)->mtx_region)
+
+/*
+ * LOG --
+ * Shared log region. One of these is allocated in shared memory,
+ * and describes the log.
+ */
+struct __log {
+ db_mutex_t mtx_region; /* Region mutex. */
+
+ db_mutex_t mtx_filelist; /* Mutex guarding file name list. */
+
+ LOGP persist; /* Persistent information. */
+
+ SH_TAILQ_HEAD(__fq1) fq; /* List of file names. */
+ int32_t fid_max; /* Max fid allocated. */
+ roff_t free_fid_stack; /* Stack of free file ids. */
+ u_int free_fids; /* Height of free fid stack. */
+ u_int free_fids_alloced; /* N free fid slots allocated. */
+
+ /*
+ * The lsn LSN is the file offset that we're about to write and which
+ * we will return to the user.
+ */
+ DB_LSN lsn; /* LSN at current file offset. */
+
+ /*
+ * The f_lsn LSN is the LSN (returned to the user) that "owns" the
+ * first byte of the buffer. If the record associated with the LSN
+ * spans buffers, it may not reflect the physical file location of
+ * the first byte of the buffer.
+ */
+ DB_LSN f_lsn; /* LSN of first byte in the buffer. */
+ size_t b_off; /* Current offset in the buffer. */
+ u_int32_t w_off; /* Current write offset in the file. */
+ u_int32_t len; /* Length of the last record. */
+
+ DB_LSN active_lsn; /* Oldest active LSN in the buffer. */
+ size_t a_off; /* Offset in the buffer of first active
+ file. */
+
+ /*
+ * The s_lsn LSN is the last LSN that we know is on disk, not just
+ * written, but synced. This field is protected by the flush mutex
+ * rather than by the region mutex.
+ */
+ db_mutex_t mtx_flush; /* Mutex guarding flushing. */
+ int in_flush; /* Log flush in progress. */
+ DB_LSN s_lsn; /* LSN of the last sync. */
+
+ DB_LOG_STAT stat; /* Log statistics. */
+
+ /*
+ * This timestamp is updated anytime someone unlinks log
+ * files. This can happen when calling __log_vtruncate
+ * or replication internal init when it unlinks log files.
+ *
+ * The timestamp is used so that other processes that might
+ * have file handles to log files know to close/reopen them
+ * so they're not potentially writing to now-removed files.
+ */
+ time_t timestamp; /* Log trunc timestamp. */
+
+ /*
+ * !!!
+ * NOTE: the next group of fields are NOT protected by the log
+ * region lock. They are protected by REP->mtx_clientdb. If you
+ * need access to both, you must acquire REP->mtx_clientdb
+ * before acquiring the log region lock.
+ *
+ * The waiting_lsn is used by the replication system. It is the
+ * first LSN that we are holding without putting in the log, because
+ * we received one or more log records out of order. Associated with
+ * the waiting_lsn is the number of log records that we still have to
+ * receive before we decide that we should request it again.
+ *
+ * The max_wait_lsn is used to control retransmission in the face
+ * of dropped messages. If we are requesting all records from the
+ * current gap (i.e., chunk of the log that we are missing), then
+ * the max_wait_lsn contains the first LSN that we are known to have
+ * in the __db.rep.db. If we requested only a single record, then
+ * the max_wait_lsn has the LSN of that record we requested.
+ */
+ /* BEGIN fields protected by rep->mtx_clientdb. */
+ DB_LSN waiting_lsn; /* First log record after a gap. */
+ DB_LSN verify_lsn; /* LSN we are waiting to verify. */
+ DB_LSN prev_ckp; /* LSN of ckp preceeding verify_lsn. */
+ DB_LSN max_wait_lsn; /* Maximum LSN requested. */
+ DB_LSN max_perm_lsn; /* Maximum PERMANENT LSN processed. */
+ db_timespec max_lease_ts; /* Maximum Lease timestamp seen. */
+ db_timespec wait_ts; /* Time to wait before requesting. */
+ db_timespec rcvd_ts; /* Initial received time to wait. */
+ db_timespec last_ts; /* Last time of insert in temp db. */
+ /*
+ * The ready_lsn is also used by the replication system. It is the
+ * next LSN we expect to receive. It's normally equal to "lsn",
+ * except at the beginning of a log file, at which point it's set
+ * to the LSN of the first record of the new file (after the
+ * header), rather than to 0.
+ */
+ DB_LSN ready_lsn;
+ /*
+ * The bulk_buf is used by replication for bulk transfer. While this
+ * is protected by REP->mtx_clientdb, this doesn't contend with the
+ * above fields because the above are used by clients and the bulk
+ * fields below are used by a master.
+ */
+ roff_t bulk_buf; /* Bulk transfer buffer in region. */
+ uintptr_t bulk_off; /* Current offset into bulk buffer. */
+ u_int32_t bulk_len; /* Length of buffer. */
+ u_int32_t bulk_flags; /* Bulk buffer flags. */
+ /* END fields protected by rep->mtx_clientdb. */
+
+ /*
+ * During initialization, the log system walks forward through the
+ * last log file to find its end. If it runs into a checkpoint
+ * while it's doing so, it caches it here so that the transaction
+ * system doesn't need to walk through the file again on its
+ * initialization.
+ */
+ DB_LSN cached_ckp_lsn;
+
+ u_int32_t regionmax; /* Configured size of the region. */
+
+ roff_t buffer_off; /* Log buffer offset in the region. */
+ u_int32_t buffer_size; /* Log buffer size. */
+
+ u_int32_t log_size; /* Log file's size. */
+ u_int32_t log_nsize; /* Next log file's size. */
+
+ int filemode; /* Log file permissions mode. */
+
+ /*
+ * DB_LOG_AUTOREMOVE and DB_LOG_INMEMORY: not protected by a mutex,
+ * all we care about is if they're zero or non-zero.
+ */
+ int db_log_autoremove;
+ int db_log_inmemory;
+
+ u_int32_t ncommit; /* Number of txns waiting to commit. */
+ DB_LSN t_lsn; /* LSN of first commit */
+ SH_TAILQ_HEAD(__commit) commits;/* list of txns waiting to commit. */
+ SH_TAILQ_HEAD(__free) free_commits;/* free list of commit structs. */
+
+ /*
+ * In-memory logs maintain a list of the start positions of all log
+ * files currently active in the in-memory buffer. This is to make the
+ * lookup from LSN to log buffer offset efficient.
+ */
+ SH_TAILQ_HEAD(__logfile) logfiles;
+ SH_TAILQ_HEAD(__free_logfile) free_logfiles;
+};
+
+/*
+ * __db_commit structure --
+ * One of these is allocated for each transaction waiting to commit.
+ */
+struct __db_commit {
+ db_mutex_t mtx_txnwait; /* Mutex for txn to wait on. */
+ DB_LSN lsn; /* LSN of commit record. */
+ SH_TAILQ_ENTRY links; /* Either on free or waiting list. */
+
+#define DB_COMMIT_FLUSH 0x0001 /* Flush the log when you wake up. */
+ u_int32_t flags;
+};
+
+/*
+ * Check for the proper progression of Log Sequence Numbers.
+ * If we are rolling forward the LSN on the page must be greater
+ * than or equal to the previous LSN in log record.
+ * We ignore NOT LOGGED LSNs. The user did an unlogged update.
+ * We should eventually see a log record that matches and continue
+ * forward.
+ * A ZERO LSN implies a page that was allocated prior to the recovery
+ * start point and then truncated later in the log. An allocation of a
+ * page after this page will extend the file, leaving a hole. We want to
+ * ignore this page until it is truncated again.
+ *
+ */
+
+#define CHECK_LSN(e, redo, cmp, lsn, prev) \
+ if (DB_REDO(redo) && (cmp) < 0 && \
+ ((!IS_NOT_LOGGED_LSN(*(lsn)) && !IS_ZERO_LSN(*(lsn))) || \
+ IS_REP_CLIENT(e))) { \
+ ret = __db_check_lsn(e, lsn, prev); \
+ goto out; \
+ }
+#define CHECK_ABORT(e, redo, cmp, lsn, prev) \
+ if (redo == DB_TXN_ABORT && (cmp) != 0 && \
+ ((!IS_NOT_LOGGED_LSN(*(lsn)) && !IS_ZERO_LSN(*(lsn))) || \
+ IS_REP_CLIENT(e))) { \
+ ret = __db_check_lsn(e, lsn, prev); \
+ goto out; \
+ }
+
+/*
+ * Helper for in-memory logs -- check whether an offset is in range
+ * in a ring buffer (inclusive of start, exclusive of end).
+ */
+struct __db_filestart {
+ u_int32_t file;
+ size_t b_off;
+
+ SH_TAILQ_ENTRY links; /* Either on free or waiting list. */
+};
+
+#define RINGBUF_LEN(lp, start, end) \
+ ((start) < (end) ? \
+ (end) - (start) : (lp)->buffer_size - ((start) - (end)))
+
+/*
+ * Internal macro to set pointer to the begin_lsn for generated
+ * logging routines. If begin_lsn is already set then do nothing.
+ * Return a pointer to the last lsn too.
+ */
+#undef DB_SET_TXN_LSNP
+#define DB_SET_TXN_LSNP(txn, blsnp, llsnp) do { \
+ DB_LSN *__lsnp; \
+ TXN_DETAIL *__td; \
+ __td = (txn)->td; \
+ *(llsnp) = &__td->last_lsn; \
+ while (__td->parent != INVALID_ROFF) \
+ __td = R_ADDR(&(txn)->mgrp->reginfo, __td->parent); \
+ __lsnp = &__td->begin_lsn; \
+ if (IS_ZERO_LSN(*__lsnp)) \
+ *(blsnp) = __lsnp; \
+} while (0)
+
+/*
+ * These are used in __log_backup to determine which LSN in the
+ * checkpoint record to compare and return.
+ */
+#define CKPLSN_CMP 0
+#define LASTCKP_CMP 1
+
+/*
+ * Status codes indicating the validity of a log file examined by
+ * __log_valid().
+ */
+typedef enum {
+ DB_LV_INCOMPLETE,
+ DB_LV_NONEXISTENT,
+ DB_LV_NORMAL,
+ DB_LV_OLD_READABLE,
+ DB_LV_OLD_UNREADABLE
+} logfile_validity;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/dbreg_auto.h"
+#include "dbinc_auto/dbreg_ext.h"
+#include "dbinc_auto/log_ext.h"
+#endif /* !_DB_LOG_H_ */
diff --git a/db-4.8.30/dbinc/mp.h b/db-4.8.30/dbinc/mp.h
new file mode 100644
index 0000000..4c6f180
--- /dev/null
+++ b/db-4.8.30/dbinc/mp.h
@@ -0,0 +1,647 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_MP_H_
+#define _DB_MP_H_
+
+#include "dbinc/atomic.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+struct __bh; typedef struct __bh BH;
+struct __bh_frozen_p; typedef struct __bh_frozen_p BH_FROZEN_PAGE;
+struct __bh_frozen_a; typedef struct __bh_frozen_a BH_FROZEN_ALLOC;
+struct __db_mpool_hash; typedef struct __db_mpool_hash DB_MPOOL_HASH;
+struct __db_mpreg; typedef struct __db_mpreg DB_MPREG;
+struct __mpool; typedef struct __mpool MPOOL;
+
+ /* We require at least 20KB of cache. */
+#define DB_CACHESIZE_MIN (20 * 1024)
+
+/*
+ * DB_MPOOLFILE initialization methods cannot be called after open is called,
+ * other methods cannot be called before open is called
+ */
+#define MPF_ILLEGAL_AFTER_OPEN(dbmfp, name) \
+ if (F_ISSET(dbmfp, MP_OPEN_CALLED)) \
+ return (__db_mi_open((dbmfp)->env, name, 1));
+#define MPF_ILLEGAL_BEFORE_OPEN(dbmfp, name) \
+ if (!F_ISSET(dbmfp, MP_OPEN_CALLED)) \
+ return (__db_mi_open((dbmfp)->env, name, 0));
+
+/*
+ * Cache flush operations, plus modifiers.
+ */
+#define DB_SYNC_ALLOC 0x0001 /* Flush for allocation. */
+#define DB_SYNC_CACHE 0x0002 /* Flush entire cache. */
+#define DB_SYNC_CHECKPOINT 0x0004 /* Checkpoint. */
+#define DB_SYNC_FILE 0x0008 /* Flush file. */
+#define DB_SYNC_INTERRUPT_OK 0x0010 /* Allow interrupt and return OK. */
+#define DB_SYNC_QUEUE_EXTENT 0x0020 /* Flush a queue file with extents. */
+#define DB_SYNC_SUPPRESS_WRITE 0x0040 /* Ignore max-write configuration. */
+#define DB_SYNC_TRICKLE 0x0080 /* Trickle sync. */
+
+/*
+ * DB_MPOOL --
+ * Per-process memory pool structure.
+ */
+struct __db_mpool {
+ /* These fields need to be protected for multi-threaded support. */
+ db_mutex_t mutex; /* Thread mutex. */
+
+ /*
+ * DB_MPREG structure for the DB pgin/pgout routines.
+ *
+ * Linked list of application-specified pgin/pgout routines.
+ */
+ DB_MPREG *pg_inout;
+ LIST_HEAD(__db_mpregh, __db_mpreg) dbregq;
+
+ /* List of DB_MPOOLFILE's. */
+ TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq;
+
+ /*
+ * The env and reginfo fields are not thread protected, as they are
+ * initialized during mpool creation, and not modified again.
+ */
+ ENV *env; /* Enclosing environment. */
+ REGINFO *reginfo; /* Underlying cache regions. */
+};
+
+/*
+ * DB_MPREG --
+ * DB_MPOOL registry of pgin/pgout functions.
+ */
+struct __db_mpreg {
+ LIST_ENTRY(__db_mpreg) q; /* Linked list. */
+
+ int32_t ftype; /* File type. */
+ /* Pgin, pgout routines. */
+ int (*pgin) __P((DB_ENV *, db_pgno_t, void *, DBT *));
+ int (*pgout) __P((DB_ENV *, db_pgno_t, void *, DBT *));
+};
+
+/*
+ * File hashing --
+ * We hash each file to hash bucket based on its fileid
+ * or, in the case of in memory files, its name.
+ */
+
+/* Number of file hash buckets, a small prime number */
+#define MPOOL_FILE_BUCKETS 17
+
+#define FHASH(id, len) __ham_func5(NULL, id, (u_int32_t)(len))
+
+#define FNBUCKET(id, len) \
+ (FHASH(id, len) % MPOOL_FILE_BUCKETS)
+
+/* Macros to lock/unlock the mpool region as a whole. */
+#define MPOOL_SYSTEM_LOCK(env) \
+ MUTEX_LOCK(env, ((MPOOL *) \
+ (env)->mp_handle->reginfo[0].primary)->mtx_region)
+#define MPOOL_SYSTEM_UNLOCK(env) \
+ MUTEX_UNLOCK(env, ((MPOOL *) \
+ (env)->mp_handle->reginfo[0].primary)->mtx_region)
+
+/* Macros to lock/unlock a specific mpool region. */
+#define MPOOL_REGION_LOCK(env, infop) \
+ MUTEX_LOCK(env, ((MPOOL *)(infop)->primary)->mtx_region)
+#define MPOOL_REGION_UNLOCK(env, infop) \
+ MUTEX_UNLOCK(env, ((MPOOL *)(infop)->primary)->mtx_region)
+
+/*
+ * MPOOL --
+ * Shared memory pool region.
+ */
+struct __mpool {
+ /*
+ * The memory pool can be broken up into individual pieces/files.
+ * There are two reasons for this: firstly, on Solaris you can allocate
+ * only a little more than 2GB of memory in a contiguous chunk,
+ * and I expect to see more systems with similar issues. Secondly,
+ * applications can add / remove pieces to dynamically resize the
+ * cache.
+ *
+ * While this structure is duplicated in each piece of the cache,
+ * the first of these pieces/files describes the entire pool, the
+ * second only describe a piece of the cache.
+ */
+ db_mutex_t mtx_region; /* Region mutex. */
+ db_mutex_t mtx_resize; /* Resizing mutex. */
+
+ /*
+ * The lsn field and list of underlying MPOOLFILEs are thread protected
+ * by the region lock.
+ */
+ DB_LSN lsn; /* Maximum checkpoint LSN. */
+
+ /* Configuration information: protected by the region lock. */
+ u_int32_t max_nreg; /* Maximum number of regions. */
+ size_t mp_mmapsize; /* Maximum file size for mmap. */
+ int mp_maxopenfd; /* Maximum open file descriptors. */
+ int mp_maxwrite; /* Maximum buffers to write. */
+ db_timeout_t mp_maxwrite_sleep; /* Sleep after writing max buffers. */
+
+ /*
+ * The number of regions and the total number of hash buckets across
+ * all regions.
+ * These fields are not protected by a mutex because we assume that we
+ * can read a 32-bit value atomically. They are only modified by cache
+ * resizing which holds the mpool resizing mutex to ensure that
+ * resizing is single-threaded. See the comment in mp_resize.c for
+ * more information.
+ */
+ u_int32_t nreg; /* Number of underlying REGIONS. */
+ u_int32_t nbuckets; /* Total number of hash buckets. */
+
+ /*
+ * The regid field is protected by the resize mutex.
+ */
+ roff_t regids; /* Array of underlying REGION Ids. */
+
+ roff_t ftab; /* Hash table of files. */
+
+ /*
+ * The following fields describe the per-cache portion of the region.
+ *
+ * The htab and htab_buckets fields are not thread protected as they
+ * are initialized during mpool creation, and not modified again.
+ *
+ * The last_checked and lru_count fields are thread protected by
+ * the region lock.
+ */
+ roff_t htab; /* Hash table offset. */
+ u_int32_t htab_buckets; /* Number of hash table entries. */
+ u_int32_t last_checked; /* Last bucket checked for free. */
+ u_int32_t lru_count; /* Counter for buffer LRU. */
+ int32_t lru_reset; /* Hash bucket lru reset point. */
+
+ /*
+ * The stat fields are generally not thread protected, and cannot be
+ * trusted. Note that st_pages is an exception, and is always updated
+ * inside a region lock (although it is sometimes read outside of the
+ * region lock).
+ */
+ DB_MPOOL_STAT stat; /* Per-cache mpool statistics. */
+
+ /*
+ * We track page puts so that we can decide when allocation is never
+ * going to succeed. We don't lock the field, all we care about is
+ * if it changes.
+ */
+ u_int32_t put_counter; /* Count of page put calls. */
+
+ /*
+ * Cache flush operations take a long time...
+ *
+ * Some cache flush operations want to ignore the app's configured
+ * max-write parameters (they are trying to quickly shut down an
+ * environment, for example). We can't specify that as an argument
+ * to the cache region functions, because we may decide to ignore
+ * the max-write configuration after the cache operation has begun.
+ * If the variable suppress_maxwrite is set, ignore the application
+ * max-write config.
+ *
+ * We may want to interrupt cache flush operations in high-availability
+ * configurations.
+ */
+#define DB_MEMP_SUPPRESS_WRITE 0x01
+#define DB_MEMP_SYNC_INTERRUPT 0x02
+ u_int32_t config_flags;
+
+ /* Free frozen buffer headers, protected by the region lock. */
+ SH_TAILQ_HEAD(__free_frozen) free_frozen;
+
+ /* Allocated blocks of frozen buffer headers. */
+ SH_TAILQ_HEAD(__alloc_frozen) alloc_frozen;
+};
+
+/*
+ * NREGION --
+ * Select a cache region given the bucket number.
+ */
+#define NREGION(mp, bucket) \
+ ((bucket) / (mp)->htab_buckets)
+
+/*
+ * MP_HASH --
+ * We make the assumption that early pages of the file are more likely
+ * to be retrieved than the later pages, which means the top bits will
+ * be more interesting for hashing as they're less likely to collide.
+ * That said, as 512 8K pages represents a 4MB file, so only reasonably
+ * large files will have page numbers with any other than the bottom 9
+ * bits set. We XOR in the MPOOL offset of the MPOOLFILE that backs the
+ * page, since that should also be unique for the page. We don't want
+ * to do anything very fancy -- speed is more important to us than using
+ * good hashing.
+ *
+ * Since moving to a dynamic hash, which boils down to using some of the
+ * least significant bits of the hash value, we no longer want to use a
+ * simple shift here, because it's likely with a bit shift that mf_offset
+ * will be ignored, and pages from different files end up in the same
+ * hash bucket. Use a nearby prime instead.
+ */
+#define MP_HASH(mf_offset, pgno) \
+ ((((pgno) << 8) ^ (pgno)) ^ (((u_int32_t) mf_offset) * 509))
+
+/*
+ * Inline the calculation of the mask, since we can't reliably store the mask
+ * with the number of buckets in the region.
+ *
+ * This is equivalent to:
+ * mask = (1 << __db_log2(nbuckets)) - 1;
+ */
+#define MP_MASK(nbuckets, mask) do { \
+ for (mask = 1; mask < (nbuckets); mask = (mask << 1) | 1) \
+ ; \
+} while (0)
+
+#define MP_HASH_BUCKET(hash, nbuckets, mask, bucket) do { \
+ (bucket) = (hash) & (mask); \
+ if ((bucket) >= (nbuckets)) \
+ (bucket) &= ((mask) >> 1); \
+} while (0)
+
+#define MP_BUCKET(mf_offset, pgno, nbuckets, bucket) do { \
+ u_int32_t __mask; \
+ MP_MASK(nbuckets, __mask); \
+ MP_HASH_BUCKET(MP_HASH(mf_offset, pgno), nbuckets, \
+ __mask, bucket); \
+} while (0)
+
+/*
+ * MP_GET_REGION --
+ * Select the region for a given page.
+ */
+#define MP_GET_REGION(dbmfp, pgno, infopp, ret) do { \
+ DB_MPOOL *__t_dbmp; \
+ MPOOL *__t_mp; \
+ \
+ __t_dbmp = dbmfp->env->mp_handle; \
+ __t_mp = __t_dbmp->reginfo[0].primary; \
+ if (__t_mp->max_nreg == 1) { \
+ *(infopp) = &__t_dbmp->reginfo[0]; \
+ } else \
+ ret = __memp_get_bucket((dbmfp)->env, \
+ (dbmfp)->mfp, (pgno), (infopp), NULL, NULL); \
+} while (0)
+
+/*
+ * MP_GET_BUCKET --
+ * Select and lock the bucket for a given page.
+ */
+#define MP_GET_BUCKET(env, mfp, pgno, infopp, hp, bucket, ret) do { \
+ DB_MPOOL *__t_dbmp; \
+ MPOOL *__t_mp; \
+ roff_t __t_mf_offset; \
+ \
+ __t_dbmp = (env)->mp_handle; \
+ __t_mp = __t_dbmp->reginfo[0].primary; \
+ if (__t_mp->max_nreg == 1) { \
+ *(infopp) = &__t_dbmp->reginfo[0]; \
+ __t_mf_offset = R_OFFSET(*(infopp), (mfp)); \
+ MP_BUCKET(__t_mf_offset, \
+ (pgno), __t_mp->nbuckets, bucket); \
+ (hp) = R_ADDR(*(infopp), __t_mp->htab); \
+ (hp) = &(hp)[bucket]; \
+ MUTEX_READLOCK(env, (hp)->mtx_hash); \
+ ret = 0; \
+ } else \
+ ret = __memp_get_bucket((env), \
+ (mfp), (pgno), (infopp), &(hp), &(bucket)); \
+} while (0)
+
+struct __db_mpool_hash {
+ db_mutex_t mtx_hash; /* Per-bucket mutex. */
+
+ DB_HASHTAB hash_bucket; /* Head of bucket. */
+
+ db_atomic_t hash_page_dirty;/* Count of dirty pages. */
+
+#ifndef __TEST_DB_NO_STATISTICS
+ u_int32_t hash_io_wait; /* Count of I/O waits. */
+ u_int32_t hash_frozen; /* Count of frozen buffers. */
+ u_int32_t hash_thawed; /* Count of thawed buffers. */
+ u_int32_t hash_frozen_freed;/* Count of freed frozen buffers. */
+#endif
+
+ DB_LSN old_reader; /* Oldest snapshot reader (cached). */
+
+ u_int32_t flags;
+};
+
+/*
+ * The base mpool priority is 1/4th of the name space, or just under 2^30.
+ * When the LRU counter wraps, we shift everybody down to a base-relative
+ * value.
+ */
+#define MPOOL_BASE_DECREMENT (UINT32_MAX - (UINT32_MAX / 4))
+
+/*
+ * Mpool priorities from low to high. Defined in terms of fractions of the
+ * buffers in the pool.
+ */
+#define MPOOL_PRI_VERY_LOW -1 /* Dead duck. Check and set to 0. */
+#define MPOOL_PRI_LOW -2 /* Low. */
+#define MPOOL_PRI_DEFAULT 0 /* No adjustment -- special case.*/
+#define MPOOL_PRI_HIGH 10 /* With the dirty buffers. */
+#define MPOOL_PRI_DIRTY 10 /* Dirty gets a 10% boost. */
+#define MPOOL_PRI_VERY_HIGH 1 /* Add number of buffers in pool. */
+
+/*
+ * MPOOLFILE --
+ * Shared DB_MPOOLFILE information.
+ */
+struct __mpoolfile {
+ db_mutex_t mutex; /* MPOOLFILE mutex. */
+
+ /* Protected by MPOOLFILE mutex. */
+ u_int32_t mpf_cnt; /* Ref count: DB_MPOOLFILEs. */
+ u_int32_t block_cnt; /* Ref count: blocks in cache. */
+ db_pgno_t last_pgno; /* Last page in the file. */
+ db_pgno_t last_flushed_pgno; /* Last page flushed to disk. */
+ db_pgno_t orig_last_pgno; /* Original last page in the file. */
+ db_pgno_t maxpgno; /* Maximum page number. */
+
+ roff_t path_off; /* File name location. */
+
+ /* Protected by hash bucket mutex. */
+ SH_TAILQ_ENTRY q; /* List of MPOOLFILEs */
+
+ /*
+ * The following are used for file compaction processing.
+ * They are only used when a thread is in the process
+ * of trying to move free pages to the end of the file.
+ * Other threads may look here when freeing a page.
+ * Protected by a lock on the metapage.
+ */
+ u_int32_t free_ref; /* Refcount to freelist. */
+ u_int32_t free_cnt; /* Count of free pages. */
+ size_t free_size; /* Allocated size of free list. */
+ roff_t free_list; /* Offset to free list. */
+
+ /*
+ * We normally don't lock the deadfile field when we read it since we
+ * only care if the field is zero or non-zero. We do lock on read when
+ * searching for a matching MPOOLFILE -- see that code for more detail.
+ */
+ int32_t deadfile; /* Dirty pages can be discarded. */
+
+ u_int32_t bucket; /* hash bucket for this file. */
+
+ /*
+ * None of the following fields are thread protected.
+ *
+ * There are potential races with the ftype field because it's read
+ * without holding a lock. However, it has to be set before adding
+ * any buffers to the cache that depend on it being set, so there
+ * would need to be incorrect operation ordering to have a problem.
+ */
+ int32_t ftype; /* File type. */
+
+ /*
+ * There are potential races with the priority field because it's read
+ * without holding a lock. However, a collision is unlikely and if it
+ * happens is of little consequence.
+ */
+ int32_t priority; /* Priority when unpinning buffer. */
+
+ /*
+ * There are potential races with the file_written field (many threads
+ * may be writing blocks at the same time), and with no_backing_file
+ * and unlink_on_close fields, as they may be set while other threads
+ * are reading them. However, we only care if the field value is zero
+ * or non-zero, so don't lock the memory.
+ *
+ * !!!
+ * Theoretically, a 64-bit architecture could put two of these fields
+ * in a single memory operation and we could race. I have never seen
+ * an architecture where that's a problem, and I believe Java requires
+ * that to never be the case.
+ *
+ * File_written is set whenever a buffer is marked dirty in the cache.
+ * It can be cleared in some cases, after all dirty buffers have been
+ * written AND the file has been flushed to disk.
+ */
+ int32_t file_written; /* File was written. */
+ int32_t no_backing_file; /* Never open a backing file. */
+ int32_t unlink_on_close; /* Unlink file on last close. */
+ int32_t multiversion; /* Number of DB_MULTIVERSION handles. */
+
+ /*
+ * We do not protect the statistics in "stat" because of the cost of
+ * the mutex in the get/put routines. There is a chance that a count
+ * will get lost.
+ */
+ DB_MPOOL_FSTAT stat; /* Per-file mpool statistics. */
+
+ /*
+ * The remaining fields are initialized at open and never subsequently
+ * modified.
+ */
+ int32_t lsn_off; /* Page's LSN offset. */
+ u_int32_t clear_len; /* Bytes to clear on page create. */
+
+ roff_t fileid_off; /* File ID string location. */
+
+ roff_t pgcookie_len; /* Pgin/pgout cookie length. */
+ roff_t pgcookie_off; /* Pgin/pgout cookie location. */
+
+ /*
+ * The flags are initialized at open and never subsequently modified.
+ */
+#define MP_CAN_MMAP 0x001 /* If the file can be mmap'd. */
+#define MP_DIRECT 0x002 /* No OS buffering. */
+#define MP_DURABLE_UNKNOWN 0x004 /* We don't care about durability. */
+#define MP_EXTENT 0x008 /* Extent file. */
+#define MP_FAKE_DEADFILE 0x010 /* Deadfile field: fake flag. */
+#define MP_FAKE_FILEWRITTEN 0x020 /* File_written field: fake flag. */
+#define MP_FAKE_NB 0x040 /* No_backing_file field: fake flag. */
+#define MP_FAKE_UOC 0x080 /* Unlink_on_close field: fake flag. */
+#define MP_NOT_DURABLE 0x100 /* File is not durable. */
+#define MP_TEMP 0x200 /* Backing file is a temporary. */
+ u_int32_t flags;
+};
+
+/*
+ * Flags to __memp_bh_free.
+ */
+#define BH_FREE_FREEMEM 0x01
+#define BH_FREE_REUSE 0x02
+#define BH_FREE_UNLOCKED 0x04
+
+/*
+ * BH --
+ * Buffer header.
+ */
+struct __bh {
+ db_mutex_t mtx_buf; /* Shared/Exclusive mutex */
+ db_atomic_t ref; /* Reference count. */
+#define BH_REFCOUNT(bhp) atomic_read(&(bhp)->ref)
+
+#define BH_CALLPGIN 0x001 /* Convert the page before use. */
+#define BH_DIRTY 0x002 /* Page is modified. */
+#define BH_DIRTY_CREATE 0x004 /* Page is modified. */
+#define BH_DISCARD 0x008 /* Page is useless. */
+#define BH_EXCLUSIVE 0x010 /* Exclusive access acquired. */
+#define BH_FREED 0x020 /* Page was freed. */
+#define BH_FROZEN 0x040 /* Frozen buffer: allocate & re-read. */
+#define BH_TRASH 0x080 /* Page is garbage. */
+#define BH_THAWED 0x100 /* Page was thawed. */
+ u_int16_t flags;
+
+ u_int32_t priority; /* Priority. */
+ SH_TAILQ_ENTRY hq; /* MPOOL hash bucket queue. */
+
+ db_pgno_t pgno; /* Underlying MPOOLFILE page number. */
+ roff_t mf_offset; /* Associated MPOOLFILE offset. */
+ u_int32_t bucket; /* Hash bucket containing header. */
+ int region; /* Region containing header. */
+
+ roff_t td_off; /* MVCC: creating TXN_DETAIL offset. */
+ SH_CHAIN_ENTRY vc; /* MVCC: version chain. */
+#ifdef DIAG_MVCC
+ u_int16_t align_off; /* Alignment offset for diagnostics.*/
+#endif
+
+ /*
+ * !!!
+ * This array must be at least size_t aligned -- the DB access methods
+ * put PAGE and other structures into it, and then access them directly.
+ * (We guarantee size_t alignment to applications in the documentation,
+ * too.)
+ */
+ u_int8_t buf[1]; /* Variable length data. */
+};
+
+/*
+ * BH_FROZEN_PAGE --
+ * Data used to find a frozen buffer header.
+ */
+struct __bh_frozen_p {
+ BH header;
+ db_pgno_t spgno; /* Page number in freezer file. */
+};
+
+/*
+ * BH_FROZEN_ALLOC --
+ * Frozen buffer headers are allocated a page at a time in general. This
+ * structure is allocated at the beginning of the page so that the
+ * allocation chunks can be tracked and freed (for private environments).
+ */
+struct __bh_frozen_a {
+ SH_TAILQ_ENTRY links;
+};
+
+#define MULTIVERSION(dbp) ((dbp)->mpf->mfp->multiversion)
+#define IS_DIRTY(p) \
+ (F_ISSET((BH *)((u_int8_t *) \
+ (p) - SSZA(BH, buf)), BH_DIRTY|BH_EXCLUSIVE) == (BH_DIRTY|BH_EXCLUSIVE))
+
+#define IS_VERSION(dbp, p) \
+ (!F_ISSET(dbp->mpf->mfp, MP_CAN_MMAP) && \
+ SH_CHAIN_HASPREV((BH *)((u_int8_t *)(p) - SSZA(BH, buf)), vc))
+
+#define BH_OWNER(env, bhp) \
+ ((TXN_DETAIL *)R_ADDR(&env->tx_handle->reginfo, bhp->td_off))
+
+#define BH_OWNED_BY(env, bhp, txn) ((txn) != NULL && \
+ (bhp)->td_off != INVALID_ROFF && \
+ (txn)->td == BH_OWNER(env, bhp))
+
+#define VISIBLE_LSN(env, bhp) \
+ (&BH_OWNER(env, bhp)->visible_lsn)
+
+/*
+ * Make a copy of the buffer's visible LSN, one field at a time. We rely on the
+ * 32-bit operations being atomic. The visible_lsn starts at MAX_LSN and is
+ * set during commit or abort to the current LSN.
+ *
+ * If we race with a commit / abort, we may see either the file or the offset
+ * still at UINT32_MAX, so vlsn is guaranteed to be in the future. That's OK,
+ * since we had to take the log region lock to allocate the read LSN so we were
+ * never going to see this buffer anyway.
+ */
+#define BH_VISIBLE(env, bhp, read_lsnp, vlsn) \
+ (bhp->td_off == INVALID_ROFF || \
+ ((vlsn).file = VISIBLE_LSN(env, bhp)->file, \
+ (vlsn).offset = VISIBLE_LSN(env, bhp)->offset, \
+ LOG_COMPARE((read_lsnp), &(vlsn)) >= 0))
+
+#define BH_OBSOLETE(bhp, old_lsn, vlsn) (SH_CHAIN_HASNEXT(bhp, vc) ? \
+ BH_VISIBLE(env, SH_CHAIN_NEXTP(bhp, vc, __bh), &(old_lsn), vlsn) :\
+ BH_VISIBLE(env, bhp, &(old_lsn), vlsn))
+
+#define MVCC_SKIP_CURADJ(dbc, pgno) (dbc->txn != NULL && \
+ F_ISSET(dbc->txn, TXN_SNAPSHOT) && MULTIVERSION(dbc->dbp) && \
+ dbc->txn->td != NULL && __memp_skip_curadj(dbc, pgno))
+
+#if defined(DIAG_MVCC) && defined(HAVE_MPROTECT)
+#define VM_PAGESIZE 4096
+#define MVCC_BHSIZE(mfp, sz) do { \
+ sz += VM_PAGESIZE + sizeof(BH); \
+ if (mfp->stat.st_pagesize < VM_PAGESIZE) \
+ sz += VM_PAGESIZE - mfp->stat.st_pagesize; \
+} while (0)
+
+#define MVCC_BHALIGN(p) do { \
+ BH *__bhp; \
+ void *__orig = (p); \
+ p = ALIGNP_INC(p, VM_PAGESIZE); \
+ if ((u_int8_t *)p < (u_int8_t *)__orig + sizeof(BH)) \
+ p = (u_int8_t *)p + VM_PAGESIZE; \
+ __bhp = (BH *)((u_int8_t *)p - SSZA(BH, buf)); \
+ DB_ASSERT(env, \
+ ((uintptr_t)__bhp->buf & (VM_PAGESIZE - 1)) == 0); \
+ DB_ASSERT(env, \
+ (u_int8_t *)__bhp >= (u_int8_t *)__orig); \
+ DB_ASSERT(env, (u_int8_t *)p + mfp->stat.st_pagesize < \
+ (u_int8_t *)__orig + len); \
+ __bhp->align_off = \
+ (u_int16_t)((u_int8_t *)__bhp - (u_int8_t *)__orig); \
+ p = __bhp; \
+} while (0)
+
+#define MVCC_BHUNALIGN(bhp) do { \
+ (bhp) = (BH *)((u_int8_t *)(bhp) - (bhp)->align_off); \
+} while (0)
+
+#ifdef linux
+#define MVCC_MPROTECT(buf, sz, mode) do { \
+ int __ret = mprotect((buf), (sz), (mode)); \
+ DB_ASSERT(env, __ret == 0); \
+} while (0)
+#else
+#define MVCC_MPROTECT(buf, sz, mode) do { \
+ if (!F_ISSET(env, ENV_PRIVATE | ENV_SYSTEM_MEM)) { \
+ int __ret = mprotect((buf), (sz), (mode)); \
+ DB_ASSERT(env, __ret == 0); \
+ } \
+} while (0)
+#endif /* linux */
+
+#else /* defined(DIAG_MVCC) && defined(HAVE_MPROTECT) */
+#define MVCC_BHSIZE(mfp, sz) do {} while (0)
+#define MVCC_BHALIGN(p) do {} while (0)
+#define MVCC_BHUNALIGN(bhp) do {} while (0)
+#define MVCC_MPROTECT(buf, size, mode) do {} while (0)
+#endif
+
+/*
+ * Flags to __memp_ftruncate.
+ */
+#define MP_TRUNC_RECOVER 0x01
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/mp_ext.h"
+#endif /* !_DB_MP_H_ */
diff --git a/db-4.8.30/dbinc/mutex.h b/db-4.8.30/dbinc/mutex.h
new file mode 100644
index 0000000..028cbb3
--- /dev/null
+++ b/db-4.8.30/dbinc/mutex.h
@@ -0,0 +1,277 @@
+/*
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_MUTEX_H_
+#define _DB_MUTEX_H_
+
+#ifdef HAVE_MUTEX_SUPPORT
+/* The inlined trylock calls need access to the details of mutexes. */
+#define LOAD_ACTUAL_MUTEX_CODE
+#include "dbinc/mutex_int.h"
+
+#ifndef HAVE_SHARED_LATCHES
+ #error "Shared latches are required in DB 4.8 and above"
+#endif
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * By default, spin 50 times per processor if fail to acquire a test-and-set
+ * mutex, we have anecdotal evidence it's a reasonable value.
+ */
+#define MUTEX_SPINS_PER_PROCESSOR 50
+
+/*
+ * Mutexes are represented by unsigned, 32-bit integral values. As the
+ * OOB value is 0, mutexes can be initialized by zero-ing out the memory
+ * in which they reside.
+ */
+#define MUTEX_INVALID 0
+
+/*
+ * We track mutex allocations by ID.
+ */
+#define MTX_APPLICATION 1
+#define MTX_ATOMIC_EMULATION 2
+#define MTX_DB_HANDLE 3
+#define MTX_ENV_DBLIST 4
+#define MTX_ENV_HANDLE 5
+#define MTX_ENV_REGION 6
+#define MTX_LOCK_REGION 7
+#define MTX_LOGICAL_LOCK 8
+#define MTX_LOG_FILENAME 9
+#define MTX_LOG_FLUSH 10
+#define MTX_LOG_HANDLE 11
+#define MTX_LOG_REGION 12
+#define MTX_MPOOLFILE_HANDLE 13
+#define MTX_MPOOL_BH 14
+#define MTX_MPOOL_FH 15
+#define MTX_MPOOL_FILE_BUCKET 16
+#define MTX_MPOOL_HANDLE 17
+#define MTX_MPOOL_HASH_BUCKET 18
+#define MTX_MPOOL_REGION 19
+#define MTX_MUTEX_REGION 20
+#define MTX_MUTEX_TEST 21
+#define MTX_REP_CHKPT 22
+#define MTX_REP_DATABASE 23
+#define MTX_REP_EVENT 24
+#define MTX_REP_REGION 25
+#define MTX_REPMGR 26
+#define MTX_SEQUENCE 27
+#define MTX_TWISTER 28
+#define MTX_TXN_ACTIVE 29
+#define MTX_TXN_CHKPT 30
+#define MTX_TXN_COMMIT 31
+#define MTX_TXN_MVCC 32
+#define MTX_TXN_REGION 33
+
+#define MTX_MAX_ENTRY 33
+
+/* Redirect mutex calls to the correct functions. */
+#if !defined(HAVE_MUTEX_HYBRID) && ( \
+ defined(HAVE_MUTEX_PTHREADS) || \
+ defined(HAVE_MUTEX_SOLARIS_LWP) || \
+ defined(HAVE_MUTEX_UI_THREADS))
+#define __mutex_init(a, b, c) __db_pthread_mutex_init(a, b, c)
+#define __mutex_lock(a, b) __db_pthread_mutex_lock(a, b)
+#define __mutex_unlock(a, b) __db_pthread_mutex_unlock(a, b)
+#define __mutex_destroy(a, b) __db_pthread_mutex_destroy(a, b)
+#define __mutex_trylock(a, b) __db_pthread_mutex_trylock(a, b)
+/*
+ * These trylock versions do not support DB_ENV_FAILCHK. Callers which loop
+ * checking mutexes which are held by dead processes or threads might spin.
+ * These have ANSI-style definitions because this file can be included by
+ * C++ files, and extern "C" affects linkage only, not argument typing.
+ */
+static inline int __db_pthread_mutex_trylock(ENV *env, db_mutex_t mutex)
+{
+ int ret;
+ DB_MUTEX *mutexp;
+ if (!MUTEX_ON(env) || F_ISSET(env->dbenv, DB_ENV_NOLOCKING))
+ return (0);
+ mutexp = MUTEXP_SET(env->mutex_handle, mutex);
+#ifdef HAVE_SHARED_LATCHES
+ if (F_ISSET(mutexp, DB_MUTEX_SHARED))
+ ret = pthread_rwlock_trywrlock(&mutexp->u.rwlock);
+ else
+#endif
+ if ((ret = pthread_mutex_trylock(&mutexp->u.m.mutex)) == 0)
+ F_SET(mutexp, DB_MUTEX_LOCKED);
+ if (ret == EBUSY)
+ ret = DB_LOCK_NOTGRANTED;
+#ifdef HAVE_STATISTICS
+ if (ret == 0)
+ ++mutexp->mutex_set_nowait;
+#endif
+ return (ret);
+}
+#ifdef HAVE_SHARED_LATCHES
+#define __mutex_rdlock(a, b) __db_pthread_mutex_readlock(a, b)
+#define __mutex_tryrdlock(a, b) __db_pthread_mutex_tryreadlock(a, b)
+static inline int __db_pthread_mutex_tryreadlock(ENV *env, db_mutex_t mutex)
+{
+ int ret;
+ DB_MUTEX *mutexp;
+ if (!MUTEX_ON(env) || F_ISSET(env->dbenv, DB_ENV_NOLOCKING))
+ return (0);
+ mutexp = MUTEXP_SET(env->mutex_handle, mutex);
+ if (F_ISSET(mutexp, DB_MUTEX_SHARED))
+ ret = pthread_rwlock_tryrdlock(&mutexp->u.rwlock);
+ else
+ return (EINVAL);
+ if (ret == EBUSY)
+ ret = DB_LOCK_NOTGRANTED;
+#ifdef HAVE_STATISTICS
+ if (ret == 0)
+ ++mutexp->mutex_set_rd_nowait;
+#endif
+ return (ret);
+}
+#endif
+#elif defined(HAVE_MUTEX_WIN32) || defined(HAVE_MUTEX_WIN32_GCC)
+#define __mutex_init(a, b, c) __db_win32_mutex_init(a, b, c)
+#define __mutex_lock(a, b) __db_win32_mutex_lock(a, b)
+#define __mutex_trylock(a, b) __db_win32_mutex_trylock(a, b)
+#define __mutex_unlock(a, b) __db_win32_mutex_unlock(a, b)
+#define __mutex_destroy(a, b) __db_win32_mutex_destroy(a, b)
+#ifdef HAVE_SHARED_LATCHES
+#define __mutex_rdlock(a, b) __db_win32_mutex_readlock(a, b)
+#define __mutex_tryrdlock(a, b) __db_win32_mutex_tryreadlock(a, b)
+#endif
+#elif defined(HAVE_MUTEX_FCNTL)
+#define __mutex_init(a, b, c) __db_fcntl_mutex_init(a, b, c)
+#define __mutex_lock(a, b) __db_fcntl_mutex_lock(a, b)
+#define __mutex_trylock(a, b) __db_fcntl_mutex_trylock(a, b)
+#define __mutex_unlock(a, b) __db_fcntl_mutex_unlock(a, b)
+#define __mutex_destroy(a, b) __db_fcntl_mutex_destroy(a, b)
+#else
+#define __mutex_init(a, b, c) __db_tas_mutex_init(a, b, c)
+#define __mutex_lock(a, b) __db_tas_mutex_lock(a, b)
+#define __mutex_trylock(a, b) __db_tas_mutex_trylock(a, b)
+#define __mutex_unlock(a, b) __db_tas_mutex_unlock(a, b)
+#define __mutex_destroy(a, b) __db_tas_mutex_destroy(a, b)
+#if defined(HAVE_SHARED_LATCHES)
+#define __mutex_rdlock(a, b) __db_tas_mutex_readlock(a, b)
+#define __mutex_tryrdlock(a,b) __db_tas_mutex_tryreadlock(a, b)
+#endif
+#endif
+
+/*
+ * When there is no method to get a shared latch, fall back to
+ * implementing __mutex_rdlock() as getting an exclusive one.
+ * This occurs either when !HAVE_SHARED_LATCHES or HAVE_MUTEX_FCNTL.
+ */
+#ifndef __mutex_rdlock
+#define __mutex_rdlock(a, b) __mutex_lock(a, b)
+#endif
+#ifndef __mutex_tryrdlock
+#define __mutex_tryrdlock(a, b) __mutex_trylock(a, b)
+#endif
+
+/*
+ * Lock/unlock a mutex. If the mutex was never required, the thread of
+ * control can proceed without it.
+ *
+ * We never fail to acquire or release a mutex without panicing. Simplify
+ * the macros to always return a panic value rather than saving the actual
+ * return value of the mutex routine.
+ */
+#ifdef HAVE_MUTEX_SUPPORT
+#define MUTEX_LOCK(env, mutex) do { \
+ if ((mutex) != MUTEX_INVALID && \
+ __mutex_lock(env, mutex) != 0) \
+ return (DB_RUNRECOVERY); \
+} while (0)
+
+/*
+ * Always check the return value of MUTEX_TRYLOCK()! Expect 0 on success,
+ * or DB_LOCK_NOTGRANTED, or possibly DB_RUNRECOVERY for failchk.
+ */
+#define MUTEX_TRYLOCK(env, mutex) \
+ (((mutex) == MUTEX_INVALID) ? 0 : __mutex_trylock(env, mutex))
+
+/*
+ * Acquire a DB_MUTEX_SHARED "mutex" in shared mode.
+ */
+#define MUTEX_READLOCK(env, mutex) do { \
+ if ((mutex) != MUTEX_INVALID && \
+ __mutex_rdlock(env, mutex) != 0) \
+ return (DB_RUNRECOVERY); \
+} while (0)
+#define MUTEX_TRY_READLOCK(env, mutex) \
+ ((mutex) != MUTEX_INVALID ? __mutex_tryrdlock(env, mutex) : 0)
+
+#define MUTEX_UNLOCK(env, mutex) do { \
+ if ((mutex) != MUTEX_INVALID && \
+ __mutex_unlock(env, mutex) != 0) \
+ return (DB_RUNRECOVERY); \
+} while (0)
+#else
+/*
+ * There are calls to lock/unlock mutexes outside of #ifdef's -- replace
+ * the call with something the compiler can discard, but which will make
+ * if-then-else blocks work correctly.
+ */
+#define MUTEX_LOCK(env, mutex) (mutex) = (mutex)
+#define MUTEX_TRYLOCK(env, mutex) (mutex) = (mutex)
+#define MUTEX_READLOCK(env, mutex) (mutex) = (mutex)
+#define MUTEX_TRY_READLOCK(env, mutex) (mutex) = (mutex)
+#define MUTEX_UNLOCK(env, mutex) (mutex) = (mutex)
+#define MUTEX_REQUIRED(env, mutex) (mutex) = (mutex)
+#define MUTEX_REQUIRED_READ(env, mutex) (mutex) = (mutex)
+#endif
+
+/*
+ * Berkeley DB ports may require single-threading at places in the code.
+ */
+#ifdef HAVE_MUTEX_VXWORKS
+#include "taskLib.h"
+/*
+ * Use the taskLock() mutex to eliminate a race where two tasks are
+ * trying to initialize the global lock at the same time.
+ */
+#define DB_BEGIN_SINGLE_THREAD do { \
+ if (DB_GLOBAL(db_global_init)) \
+ (void)semTake(DB_GLOBAL(db_global_lock), WAIT_FOREVER); \
+ else { \
+ taskLock(); \
+ if (DB_GLOBAL(db_global_init)) { \
+ taskUnlock(); \
+ (void)semTake(DB_GLOBAL(db_global_lock), \
+ WAIT_FOREVER); \
+ continue; \
+ } \
+ DB_GLOBAL(db_global_lock) = \
+ semBCreate(SEM_Q_FIFO, SEM_EMPTY); \
+ if (DB_GLOBAL(db_global_lock) != NULL) \
+ DB_GLOBAL(db_global_init) = 1; \
+ taskUnlock(); \
+ } \
+} while (DB_GLOBAL(db_global_init) == 0)
+#define DB_END_SINGLE_THREAD (void)semGive(DB_GLOBAL(db_global_lock))
+#endif
+
+/*
+ * Single-threading defaults to a no-op.
+ */
+#ifndef DB_BEGIN_SINGLE_THREAD
+#define DB_BEGIN_SINGLE_THREAD
+#endif
+#ifndef DB_END_SINGLE_THREAD
+#define DB_END_SINGLE_THREAD
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/mutex_ext.h"
+#endif /* !_DB_MUTEX_H_ */
diff --git a/db-4.8.30/dbinc/mutex_int.h b/db-4.8.30/dbinc/mutex_int.h
new file mode 100644
index 0000000..61edaf6
--- /dev/null
+++ b/db-4.8.30/dbinc/mutex_int.h
@@ -0,0 +1,1073 @@
+/*
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_MUTEX_INT_H_
+#define _DB_MUTEX_INT_H_
+
+#include "dbinc/atomic.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Mutexes and Shared Latches
+ *
+ * Mutexes may be test-and-set (spinning & yielding when busy),
+ * native versions (pthreads, WaitForSingleObject)
+ * or a hybrid which has the lower no-contention overhead of test-and-set
+ * mutexes, using operating system calls only to block and wakeup.
+ *
+ * Hybrid exclusive-only mutexes include a 'tas' field.
+ * Hybrid DB_MUTEX_SHARED latches also include a 'shared' field.
+ */
+
+/*********************************************************************
+ * POSIX.1 pthreads interface.
+ *********************************************************************/
+#if defined(HAVE_MUTEX_PTHREADS)
+/*
+ * Pthreads-based mutexes (exclusive-only) and latches (possibly shared)
+ * have the same MUTEX_FIELDS union. Different parts of the union are used
+ * depending on:
+ * - whether HAVE_SHARED_LATCHES is defined, and
+ * - if HAVE_SHARED_LATCHES, whether this particular instance of a mutex
+ * is a shared mutexDB_MUTEX_SHARED.
+ *
+ * The rwlock part of the union is used *only* for non-hybrid shared latches;
+ * in all other cases the mutex and cond fields are the only ones used.
+ *
+ * configuration & Who uses the field
+ * mutex
+ * mutex cond rwlock tas
+ * Native mutex y y
+ * Hybrid mutexes y y y
+ * Native sharedlatches y
+ * Hybrid sharedlatches y y y
+ *
+ * They all have a condition variable which is used only for
+ * DB_MUTEX_SELF_BLOCK waits.
+ *
+ * There can be no self-blocking shared latches: the pthread_cond_wait() would
+ * require getting a pthread_mutex_t, also it would not make sense.
+ */
+#define MUTEX_FIELDS \
+ union { \
+ struct { \
+ pthread_mutex_t mutex; /* Mutex */ \
+ pthread_cond_t cond; /* Condition variable */ \
+ } m; \
+ pthread_rwlock_t rwlock; /* Read/write lock */ \
+ } u;
+
+#if defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_HYBRID)
+#define RET_SET_PTHREAD_LOCK(mutexp, ret) do { \
+ if (F_ISSET(mutexp, DB_MUTEX_SHARED)) \
+ RET_SET((pthread_rwlock_wrlock(&(mutexp)->u.rwlock)), \
+ ret); \
+ else \
+ RET_SET((pthread_mutex_lock(&(mutexp)->u.m.mutex)), ret); \
+} while (0)
+#define RET_SET_PTHREAD_TRYLOCK(mutexp, ret) do { \
+ if (F_ISSET(mutexp, DB_MUTEX_SHARED)) \
+ RET_SET((pthread_rwlock_trywrlock(&(mutexp)->u.rwlock)), \
+ ret); \
+ else \
+ RET_SET((pthread_mutex_trylock(&(mutexp)->u.m.mutex)), \
+ ret); \
+} while (0)
+#else
+#define RET_SET_PTHREAD_LOCK(mutexp, ret) \
+ RET_SET(pthread_mutex_lock(&(mutexp)->u.m.mutex), ret);
+#define RET_SET_PTHREAD_TRYLOCK(mutexp, ret) \
+ RET_SET(pthread_mutex_trylock(&(mutexp)->u.m.mutex), ret);
+#endif
+#endif
+
+#ifdef HAVE_MUTEX_UI_THREADS
+#include <thread.h>
+#endif
+
+/*********************************************************************
+ * Solaris lwp threads interface.
+ *
+ * !!!
+ * We use LWP mutexes on Solaris instead of UI or POSIX mutexes (both of
+ * which are available), for two reasons. First, the Solaris C library
+ * includes versions of the both UI and POSIX thread mutex interfaces, but
+ * they are broken in that they don't support inter-process locking, and
+ * there's no way to detect it, e.g., calls to configure the mutexes for
+ * inter-process locking succeed without error. So, we use LWP mutexes so
+ * that we don't fail in fairly undetectable ways because the application
+ * wasn't linked with the appropriate threads library. Second, there were
+ * bugs in SunOS 5.7 (Solaris 7) where if an application loaded the C library
+ * before loading the libthread/libpthread threads libraries (e.g., by using
+ * dlopen to load the DB library), the pwrite64 interface would be translated
+ * into a call to pwrite and DB would drop core.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_SOLARIS_LWP
+/*
+ * XXX
+ * Don't change <synch.h> to <sys/lwp.h> -- although lwp.h is listed in the
+ * Solaris manual page as the correct include to use, it causes the Solaris
+ * compiler on SunOS 2.6 to fail.
+ */
+#include <synch.h>
+
+#define MUTEX_FIELDS \
+ lwp_mutex_t mutex; /* Mutex. */ \
+ lwp_cond_t cond; /* Condition variable. */
+#endif
+
+/*********************************************************************
+ * Solaris/Unixware threads interface.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_UI_THREADS
+#include <thread.h>
+#include <synch.h>
+
+#define MUTEX_FIELDS \
+ mutex_t mutex; /* Mutex. */ \
+ cond_t cond; /* Condition variable. */
+#endif
+
+/*********************************************************************
+ * AIX C library functions.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_AIX_CHECK_LOCK
+#include <sys/atomic_op.h>
+typedef int tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+#define MUTEX_INIT(x) 0
+#define MUTEX_SET(x) (!_check_lock(x, 0, 1))
+#define MUTEX_UNSET(x) _clear_lock(x, 0)
+#endif
+#endif
+
+/*********************************************************************
+ * Apple/Darwin library functions.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_DARWIN_SPIN_LOCK_TRY
+typedef u_int32_t tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+extern int _spin_lock_try(tsl_t *);
+extern void _spin_unlock(tsl_t *);
+#define MUTEX_SET(tsl) _spin_lock_try(tsl)
+#define MUTEX_UNSET(tsl) _spin_unlock(tsl)
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#endif
+#endif
+
+/*********************************************************************
+ * General C library functions (msemaphore).
+ *
+ * !!!
+ * Check for HPPA as a special case, because it requires unusual alignment,
+ * and doesn't support semaphores in malloc(3) or shmget(2) memory.
+ *
+ * !!!
+ * Do not remove the MSEM_IF_NOWAIT flag. The problem is that if a single
+ * process makes two msem_lock() calls in a row, the second one returns an
+ * error. We depend on the fact that we can lock against ourselves in the
+ * locking subsystem, where we set up a mutex so that we can block ourselves.
+ * Tested on OSF1 v4.0.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_HPPA_MSEM_INIT
+#define MUTEX_ALIGN 16
+#endif
+
+#if defined(HAVE_MUTEX_MSEM_INIT) || defined(HAVE_MUTEX_HPPA_MSEM_INIT)
+#include <sys/mman.h>
+typedef msemaphore tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+#define MUTEX_INIT(x) (msem_init(x, MSEM_UNLOCKED) <= (msemaphore *)0)
+#define MUTEX_SET(x) (!msem_lock(x, MSEM_IF_NOWAIT))
+#define MUTEX_UNSET(x) msem_unlock(x, 0)
+#endif
+#endif
+
+/*********************************************************************
+ * Plan 9 library functions.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_PLAN9
+typedef Lock tsl_t;
+
+#define MUTEX_INIT(x) (memset(x, 0, sizeof(Lock)), 0)
+#define MUTEX_SET(x) canlock(x)
+#define MUTEX_UNSET(x) unlock(x)
+#endif
+
+/*********************************************************************
+ * Reliant UNIX C library functions.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_RELIANTUNIX_INITSPIN
+#include <ulocks.h>
+typedef spinlock_t tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+#define MUTEX_INIT(x) (initspin(x, 1), 0)
+#define MUTEX_SET(x) (cspinlock(x) == 0)
+#define MUTEX_UNSET(x) spinunlock(x)
+#endif
+#endif
+
+/*********************************************************************
+ * General C library functions (POSIX 1003.1 sema_XXX).
+ *
+ * !!!
+ * Never selected by autoconfig in this release (semaphore calls are known
+ * to not work in Solaris 5.5).
+ *********************************************************************/
+#ifdef HAVE_MUTEX_SEMA_INIT
+#include <synch.h>
+typedef sema_t tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+#define MUTEX_DESTROY(x) sema_destroy(x)
+#define MUTEX_INIT(x) (sema_init(x, 1, USYNC_PROCESS, NULL) != 0)
+#define MUTEX_SET(x) (sema_wait(x) == 0)
+#define MUTEX_UNSET(x) sema_post(x)
+#endif
+#endif
+
+/*********************************************************************
+ * SGI C library functions.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_SGI_INIT_LOCK
+#include <abi_mutex.h>
+typedef abilock_t tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+#define MUTEX_INIT(x) (init_lock(x) != 0)
+#define MUTEX_SET(x) (!acquire_lock(x))
+#define MUTEX_UNSET(x) release_lock(x)
+#endif
+#endif
+
+/*********************************************************************
+ * Solaris C library functions.
+ *
+ * !!!
+ * These are undocumented functions, but they're the only ones that work
+ * correctly as far as we know.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_SOLARIS_LOCK_TRY
+#include <sys/atomic.h>
+#define MUTEX_MEMBAR(x) membar_enter()
+#define MEMBAR_ENTER() membar_enter()
+#define MEMBAR_EXIT() membar_exit()
+#include <sys/machlock.h>
+typedef lock_t tsl_t;
+
+/*
+ * The functions are declared in <sys/machlock.h>, but under #ifdef KERNEL.
+ * Re-declare them here to avoid warnings.
+ */
+extern int _lock_try(lock_t *);
+extern void _lock_clear(lock_t *);
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+#define MUTEX_INIT(x) 0
+#define MUTEX_SET(x) _lock_try(x)
+#define MUTEX_UNSET(x) _lock_clear(x)
+#endif
+#endif
+
+/*********************************************************************
+ * VMS.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_VMS
+#include <sys/mman.h>
+#include <builtins.h>
+typedef volatile unsigned char tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+#ifdef __ALPHA
+#define MUTEX_SET(tsl) (!__TESTBITSSI(tsl, 0))
+#else /* __VAX */
+#define MUTEX_SET(tsl) (!(int)_BBSSI(0, tsl))
+#endif
+#define MUTEX_UNSET(tsl) (*(tsl) = 0)
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#endif
+#endif
+
+/*********************************************************************
+ * VxWorks
+ * Use basic binary semaphores in VxWorks, as we currently do not need
+ * any special features. We do need the ability to single-thread the
+ * entire system, however, because VxWorks doesn't support the open(2)
+ * flag O_EXCL, the mechanism we normally use to single thread access
+ * when we're first looking for a DB environment.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_VXWORKS
+#include "taskLib.h"
+typedef SEM_ID tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/*
+ * Uses of this MUTEX_SET() need to have a local 'nowait' variable,
+ * which determines whether to return right away when the semaphore
+ * is busy or to wait until it is available.
+ */
+#define MUTEX_SET(tsl) \
+ (semTake((*(tsl)), nowait ? NO_WAIT : WAIT_FOREVER) == OK)
+#define MUTEX_UNSET(tsl) (semGive((*tsl)))
+#define MUTEX_INIT(tsl) \
+ ((*(tsl) = semBCreate(SEM_Q_FIFO, SEM_FULL)) == NULL)
+#define MUTEX_DESTROY(tsl) semDelete(*tsl)
+#endif
+#endif
+
+/*********************************************************************
+ * Win16
+ *
+ * Win16 spinlocks are simple because we cannot possibly be preempted.
+ *
+ * !!!
+ * We should simplify this by always returning a no-need-to-lock lock
+ * when we initialize the mutex.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_WIN16
+typedef unsigned int tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+#define MUTEX_INIT(x) 0
+#define MUTEX_SET(tsl) (*(tsl) = 1)
+#define MUTEX_UNSET(tsl) (*(tsl) = 0)
+#endif
+#endif
+
+/*********************************************************************
+ * Win32 - always a hybrid mutex
+ *********************************************************************/
+#if defined(HAVE_MUTEX_WIN32) || defined(HAVE_MUTEX_WIN32_GCC)
+typedef LONG volatile tsl_t;
+#define MUTEX_FIELDS \
+ LONG nwaiters; \
+ u_int32_t id; /* ID used for creating events */ \
+
+#if defined(LOAD_ACTUAL_MUTEX_CODE)
+#define MUTEX_SET(tsl) (!InterlockedExchange((PLONG)tsl, 1))
+#define MUTEX_UNSET(tsl) InterlockedExchange((PLONG)tsl, 0)
+#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl)
+
+/*
+ * From Intel's performance tuning documentation (and see SR #6975):
+ * ftp://download.intel.com/design/perftool/cbts/appnotes/sse2/w_spinlock.pdf
+ *
+ * "For this reason, it is highly recommended that you insert the PAUSE
+ * instruction into all spin-wait code immediately. Using the PAUSE
+ * instruction does not affect the correctness of programs on existing
+ * platforms, and it improves performance on Pentium 4 processor platforms."
+ */
+#ifdef HAVE_MUTEX_WIN32
+#if !defined(_WIN64) && !defined(DB_WINCE)
+#define MUTEX_PAUSE {__asm{_emit 0xf3}; __asm{_emit 0x90}}
+#endif
+#endif
+#ifdef HAVE_MUTEX_WIN32_GCC
+#define MUTEX_PAUSE __asm__ volatile ("rep; nop" : : );
+#endif
+#endif
+#endif
+
+/*********************************************************************
+ * 68K/gcc assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_68K_GCC_ASSEMBLY
+typedef unsigned char tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/* gcc/68K: 0 is clear, 1 is set. */
+#define MUTEX_SET(tsl) ({ \
+ register tsl_t *__l = (tsl); \
+ int __r; \
+ __asm__ volatile("tas %1; \n \
+ seq %0" \
+ : "=dm" (__r), "=m" (*__l) \
+ : "1" (*__l) \
+ ); \
+ __r & 1; \
+})
+
+#define MUTEX_UNSET(tsl) (*(tsl) = 0)
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#endif
+#endif
+
+/*********************************************************************
+ * ALPHA/gcc assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_ALPHA_GCC_ASSEMBLY
+typedef u_int32_t tsl_t;
+
+#define MUTEX_ALIGN 4
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/*
+ * For gcc/alpha. Should return 0 if could not acquire the lock, 1 if
+ * lock was acquired properly.
+ */
+static inline int
+MUTEX_SET(tsl_t *tsl) {
+ register tsl_t *__l = tsl;
+ register tsl_t __r;
+ __asm__ volatile(
+ "1: ldl_l %0,%2\n"
+ " blbs %0,2f\n"
+ " or $31,1,%0\n"
+ " stl_c %0,%1\n"
+ " beq %0,3f\n"
+ " mb\n"
+ " br 3f\n"
+ "2: xor %0,%0\n"
+ "3:"
+ : "=&r"(__r), "=m"(*__l) : "1"(*__l) : "memory");
+ return __r;
+}
+
+/*
+ * Unset mutex. Judging by Alpha Architecture Handbook, the mb instruction
+ * might be necessary before unlocking
+ */
+static inline int
+MUTEX_UNSET(tsl_t *tsl) {
+ __asm__ volatile(" mb\n");
+ return *tsl = 0;
+}
+
+#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl)
+#endif
+#endif
+
+/*********************************************************************
+ * Tru64/cc assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_TRU64_CC_ASSEMBLY
+typedef volatile u_int32_t tsl_t;
+
+#define MUTEX_ALIGN 4
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+#include <alpha/builtins.h>
+#define MUTEX_SET(tsl) (__LOCK_LONG_RETRY((tsl), 1) != 0)
+#define MUTEX_UNSET(tsl) (__UNLOCK_LONG(tsl))
+
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#endif
+#endif
+
+/*********************************************************************
+ * ARM/gcc assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_ARM_GCC_ASSEMBLY
+typedef unsigned char tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/* gcc/arm: 0 is clear, 1 is set. */
+#define MUTEX_SET(tsl) ({ \
+ int __r; \
+ __asm__ volatile( \
+ "swpb %0, %1, [%2]\n\t" \
+ "eor %0, %0, #1\n\t" \
+ : "=&r" (__r) \
+ : "r" (1), "r" (tsl) \
+ ); \
+ __r & 1; \
+})
+
+#define MUTEX_UNSET(tsl) (*(volatile tsl_t *)(tsl) = 0)
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#endif
+#endif
+
+/*********************************************************************
+ * HPPA/gcc assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_HPPA_GCC_ASSEMBLY
+typedef u_int32_t tsl_t;
+
+#define MUTEX_ALIGN 16
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/*
+ * The PA-RISC has a "load and clear" instead of a "test and set" instruction.
+ * The 32-bit word used by that instruction must be 16-byte aligned. We could
+ * use the "aligned" attribute in GCC but that doesn't work for stack variables.
+ */
+#define MUTEX_SET(tsl) ({ \
+ register tsl_t *__l = (tsl); \
+ int __r; \
+ __asm__ volatile("ldcws 0(%1),%0" : "=r" (__r) : "r" (__l)); \
+ __r & 1; \
+})
+
+#define MUTEX_UNSET(tsl) (*(volatile tsl_t *)(tsl) = -1)
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#endif
+#endif
+
+/*********************************************************************
+ * IA64/gcc assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_IA64_GCC_ASSEMBLY
+typedef volatile unsigned char tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/* gcc/ia64: 0 is clear, 1 is set. */
+#define MUTEX_SET(tsl) ({ \
+ register tsl_t *__l = (tsl); \
+ long __r; \
+ __asm__ volatile("xchg1 %0=%1,%2" : \
+ "=r"(__r), "+m"(*__l) : "r"(1)); \
+ __r ^ 1; \
+})
+
+/*
+ * Store through a "volatile" pointer so we get a store with "release"
+ * semantics.
+ */
+#define MUTEX_UNSET(tsl) (*(tsl_t *)(tsl) = 0)
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#endif
+#endif
+
+/*********************************************************************
+ * PowerPC/gcc assembly.
+ *********************************************************************/
+#if defined(HAVE_MUTEX_PPC_GCC_ASSEMBLY)
+typedef u_int32_t tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/*
+ * The PowerPC does a sort of pseudo-atomic locking. You set up a
+ * 'reservation' on a chunk of memory containing a mutex by loading the
+ * mutex value with LWARX. If the mutex has an 'unlocked' (arbitrary)
+ * value, you then try storing into it with STWCX. If no other process or
+ * thread broke your 'reservation' by modifying the memory containing the
+ * mutex, then the STCWX succeeds; otherwise it fails and you try to get
+ * a reservation again.
+ *
+ * While mutexes are explicitly 4 bytes, a 'reservation' applies to an
+ * entire cache line, normally 32 bytes, aligned naturally. If the mutex
+ * lives near data that gets changed a lot, there's a chance that you'll
+ * see more broken reservations than you might otherwise. The only
+ * situation in which this might be a problem is if one processor is
+ * beating on a variable in the same cache block as the mutex while another
+ * processor tries to acquire the mutex. That's bad news regardless
+ * because of the way it bashes caches, but if you can't guarantee that a
+ * mutex will reside in a relatively quiescent cache line, you might
+ * consider padding the mutex to force it to live in a cache line by
+ * itself. No, you aren't guaranteed that cache lines are 32 bytes. Some
+ * embedded processors use 16-byte cache lines, while some 64-bit
+ * processors use 128-bit cache lines. But assuming a 32-byte cache line
+ * won't get you into trouble for now.
+ *
+ * If mutex locking is a bottleneck, then you can speed it up by adding a
+ * regular LWZ load before the LWARX load, so that you can test for the
+ * common case of a locked mutex without wasting cycles making a reservation.
+ *
+ * gcc/ppc: 0 is clear, 1 is set.
+ */
+static inline int
+MUTEX_SET(int *tsl) {
+ int __r;
+ __asm__ volatile (
+"0: \n\t"
+" lwarx %0,0,%1 \n\t"
+" cmpwi %0,0 \n\t"
+" bne- 1f \n\t"
+" stwcx. %1,0,%1 \n\t"
+" isync \n\t"
+" beq+ 2f \n\t"
+" b 0b \n\t"
+"1: \n\t"
+" li %1,0 \n\t"
+"2: \n\t"
+ : "=&r" (__r), "+r" (tsl)
+ :
+ : "cr0", "memory");
+ return (int)tsl;
+}
+
+static inline int
+MUTEX_UNSET(tsl_t *tsl) {
+ __asm__ volatile("sync" : : : "memory");
+ return *tsl = 0;
+}
+#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl)
+#endif
+#endif
+
+/*********************************************************************
+ * OS/390 C.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_S390_CC_ASSEMBLY
+typedef int tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/*
+ * cs() is declared in <stdlib.h> but is built in to the compiler.
+ * Must use LANGLVL(EXTENDED) to get its declaration.
+ */
+#define MUTEX_SET(tsl) (!cs(&zero, (tsl), 1))
+#define MUTEX_UNSET(tsl) (*(tsl) = 0)
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#endif
+#endif
+
+/*********************************************************************
+ * S/390 32-bit assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_S390_GCC_ASSEMBLY
+typedef int tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/* gcc/S390: 0 is clear, 1 is set. */
+static inline int
+MUTEX_SET(tsl_t *tsl) { \
+ register tsl_t *__l = (tsl); \
+ int __r; \
+ __asm__ volatile( \
+ " la 1,%1\n" \
+ " lhi 0,1\n" \
+ " l %0,%1\n" \
+ "0: cs %0,0,0(1)\n" \
+ " jl 0b" \
+ : "=&d" (__r), "+m" (*__l) \
+ : : "0", "1", "cc"); \
+ return !__r; \
+}
+
+#define MUTEX_UNSET(tsl) (*(tsl) = 0)
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#endif
+#endif
+
+/*********************************************************************
+ * SCO/cc assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_SCO_X86_CC_ASSEMBLY
+typedef unsigned char tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/*
+ * UnixWare has threads in libthread, but OpenServer doesn't (yet).
+ *
+ * cc/x86: 0 is clear, 1 is set.
+ */
+#if defined(__USLC__)
+asm int
+_tsl_set(void *tsl)
+{
+%mem tsl
+ movl tsl, %ecx
+ movl $1, %eax
+ lock
+ xchgb (%ecx),%al
+ xorl $1,%eax
+}
+#endif
+
+#define MUTEX_SET(tsl) _tsl_set(tsl)
+#define MUTEX_UNSET(tsl) (*(tsl) = 0)
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#endif
+#endif
+
+/*********************************************************************
+ * Sparc/gcc assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_SPARC_GCC_ASSEMBLY
+typedef unsigned char tsl_t;
+
+#define MUTEX_ALIGN 8
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/*
+ * The ldstub instruction takes the location specified by its first argument
+ * (a register containing a memory address) and loads its contents into its
+ * second argument (a register) and atomically sets the contents the location
+ * specified by its first argument to a byte of 1s. (The value in the second
+ * argument is never read, but only overwritten.)
+ *
+ * Hybrid mutexes require membar #StoreLoad and #LoadStore ordering on multi-
+ * processor v9 systems.
+ *
+ * gcc/sparc: 0 is clear, 1 is set.
+ */
+#define MUTEX_SET(tsl) ({ \
+ register tsl_t *__l = (tsl); \
+ register tsl_t __r; \
+ __asm__ volatile \
+ ("ldstub [%1],%0; stbar" \
+ : "=r"( __r) : "r" (__l)); \
+ !__r; \
+})
+
+#define MUTEX_UNSET(tsl) (*(tsl) = 0, MUTEX_MEMBAR(tsl))
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+#define MUTEX_MEMBAR(x) \
+ ({ __asm__ volatile ("membar #StoreStore|#StoreLoad|#LoadStore"); })
+#define MEMBAR_ENTER() \
+ ({ __asm__ volatile ("membar #StoreStore|#StoreLoad"); })
+#define MEMBAR_EXIT() \
+ ({ __asm__ volatile ("membar #StoreStore|#LoadStore"); })
+#endif
+#endif
+
+/*********************************************************************
+ * UTS/cc assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_UTS_CC_ASSEMBLY
+typedef int tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+#define MUTEX_INIT(x) 0
+#define MUTEX_SET(x) (!uts_lock(x, 1))
+#define MUTEX_UNSET(x) (*(x) = 0)
+#endif
+#endif
+
+/*********************************************************************
+ * MIPS/gcc assembly.
+ *********************************************************************/
+#ifdef HAVE_MUTEX_MIPS_GCC_ASSEMBLY
+typedef u_int32_t tsl_t;
+
+#define MUTEX_ALIGN 4
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/*
+ * For gcc/MIPS. Should return 0 if could not acquire the lock, 1 if
+ * lock was acquired properly.
+ */
+static inline int
+MUTEX_SET(tsl_t *tsl) {
+ register tsl_t *__l = tsl;
+ register tsl_t __r, __t;
+ __asm__ volatile(
+ " .set push \n"
+ " .set mips2 \n"
+ " .set noreorder \n"
+ " .set nomacro \n"
+ "1: ll %0, %3 \n"
+ " ori %2, %0, 1 \n"
+ " sc %2, %1 \n"
+ " beqzl %2, 1b \n"
+ " nop \n"
+ " andi %2, %0, 1 \n"
+ " sync \n"
+ " .set reorder \n"
+ " .set pop \n"
+ : "=&r" (__t), "=m" (*tsl), "=&r" (__r)
+ : "m" (*tsl)
+ : "memory");
+ return (!__r);
+}
+
+static inline void
+MUTEX_UNSET(tsl_t *tsl) {
+ __asm__ volatile(
+ " .set noreorder \n"
+ " sync \n"
+ " sw $0, %0 \n"
+ " .set reorder \n"
+ : "=m" (*tsl)
+ : "m" (*tsl)
+ : "memory");
+}
+
+#define MUTEX_INIT(tsl) (*(tsl) = 0)
+#endif
+#endif
+
+/*********************************************************************
+ * x86/gcc (32- and 64-bit) assembly.
+ *********************************************************************/
+#if defined(HAVE_MUTEX_X86_GCC_ASSEMBLY) || \
+ defined(HAVE_MUTEX_X86_64_GCC_ASSEMBLY)
+typedef volatile unsigned char tsl_t;
+
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+/* gcc/x86: 0 is clear, 1 is set. */
+#define MUTEX_SET(tsl) ({ \
+ tsl_t __r; \
+ __asm__ volatile("movb $1, %b0\n\t" \
+ "xchgb %b0,%1" \
+ : "=&q" (__r) \
+ : "m" (*(tsl_t *)(tsl)) \
+ : "memory", "cc"); \
+ !__r; /* return 1 on success, 0 on failure */ \
+})
+
+#define MUTEX_UNSET(tsl) (*(tsl_t *)(tsl) = 0)
+#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0)
+/*
+ * We need to pass a valid address to generate the memory barrier
+ * otherwise PURIFY will complain. Use something referenced recently
+ * and initialized.
+ */
+#if defined(HAVE_MUTEX_X86_GCC_ASSEMBLY)
+#define MUTEX_MEMBAR(addr) \
+ ({ __asm__ volatile ("lock; addl $0, %0" ::"m" (addr): "memory"); 1; })
+#else
+#define MUTEX_MEMBAR(addr) \
+ ({ __asm__ volatile ("mfence" ::: "memory"); 1; })
+#endif
+
+/*
+ * From Intel's performance tuning documentation (and see SR #6975):
+ * ftp://download.intel.com/design/perftool/cbts/appnotes/sse2/w_spinlock.pdf
+ *
+ * "For this reason, it is highly recommended that you insert the PAUSE
+ * instruction into all spin-wait code immediately. Using the PAUSE
+ * instruction does not affect the correctness of programs on existing
+ * platforms, and it improves performance on Pentium 4 processor platforms."
+ */
+#define MUTEX_PAUSE __asm__ volatile ("rep; nop" : : );
+#endif
+#endif
+
+/* End of operating system & hardware architecture-specific definitions */
+
+/*
+ * Mutex alignment defaults to sizeof(unsigned int).
+ *
+ * !!!
+ * Various systems require different alignments for mutexes (the worst we've
+ * seen so far is 16-bytes on some HP architectures). Malloc(3) is assumed
+ * to return reasonable alignment, all other mutex users must ensure proper
+ * alignment locally.
+ */
+#ifndef MUTEX_ALIGN
+#define MUTEX_ALIGN sizeof(unsigned int)
+#endif
+
+/*
+ * Mutex destruction defaults to a no-op.
+ */
+#ifndef MUTEX_DESTROY
+#define MUTEX_DESTROY(x)
+#endif
+
+/*
+ * Mutex pause defaults to a no-op.
+ */
+#ifndef MUTEX_PAUSE
+#define MUTEX_PAUSE
+#endif
+
+/*
+ * If no native atomic support is available then use mutexes to
+ * emulate atomic increment, decrement, and compare-and-exchange.
+ * The address of the atomic value selects which of a small number
+ * of mutexes to use to protect the updates.
+ * The number of mutexes should be somewhat larger than the number of
+ * processors in the system in order to minimize unnecessary contention.
+ * It defaults to 8 to handle most small (1-4) cpu systems, if it hasn't
+ * already been defined (e.g. in db_config.h)
+ */
+#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) && \
+ !defined(MAX_ATOMIC_MUTEXES)
+#define MAX_ATOMIC_MUTEXES 1
+#endif
+
+/*
+ * DB_MUTEXMGR --
+ * The mutex manager encapsulates the mutex system.
+ */
+struct __db_mutexmgr {
+ /* These fields are never updated after creation, so not protected. */
+ DB_ENV *dbenv; /* Environment */
+ REGINFO reginfo; /* Region information */
+
+ void *mutex_array; /* Base of the mutex array */
+};
+
+/* Macros to lock/unlock the mutex region as a whole. */
+#define MUTEX_SYSTEM_LOCK(dbenv) \
+ MUTEX_LOCK(dbenv, ((DB_MUTEXREGION *) \
+ (dbenv)->mutex_handle->reginfo.primary)->mtx_region)
+#define MUTEX_SYSTEM_UNLOCK(dbenv) \
+ MUTEX_UNLOCK(dbenv, ((DB_MUTEXREGION *) \
+ (dbenv)->mutex_handle->reginfo.primary)->mtx_region)
+
+/*
+ * DB_MUTEXREGION --
+ * The primary mutex data structure in the shared memory region.
+ */
+typedef struct __db_mutexregion {
+ /* These fields are initialized at create time and never modified. */
+ roff_t mutex_off_alloc;/* Offset of mutex array */
+ roff_t mutex_off; /* Adjusted offset of mutex array */
+ size_t mutex_size; /* Size of the aligned mutex */
+ roff_t thread_off; /* Offset of the thread area. */
+
+ db_mutex_t mtx_region; /* Region mutex. */
+
+ /* Protected using the region mutex. */
+ u_int32_t mutex_next; /* Next free mutex */
+
+#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT)
+ /* Mutexes for emulating atomic operations. */
+ db_mutex_t mtx_atomic[MAX_ATOMIC_MUTEXES];
+#endif
+
+ DB_MUTEX_STAT stat; /* Mutex statistics */
+} DB_MUTEXREGION;
+
+#ifdef HAVE_MUTEX_SUPPORT
+struct __db_mutex_t { /* Mutex. */
+#ifdef MUTEX_FIELDS
+ MUTEX_FIELDS /* Opaque thread mutex structures. */
+#endif
+#ifndef HAVE_MUTEX_FCNTL
+#if defined(HAVE_MUTEX_HYBRID) || \
+ (defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_PTHREADS))
+ /*
+ * For hybrid and test-and-set shared latches it is a counter:
+ * 0 means it is free,
+ * -1 is exclusively locked,
+ * > 0 is the number of shared readers.
+ * Pthreads shared latches use pthread_rwlock instead.
+ */
+ db_atomic_t sharecount;
+ tsl_t tas;
+#elif !defined(MUTEX_FIELDS)
+ /*
+ * This is the Test and Set flag for exclusive latches (mutexes):
+ * there is a free value (often 0, 1, or -1) and a set value.
+ */
+ tsl_t tas;
+#endif
+#endif
+#ifdef HAVE_MUTEX_HYBRID
+ volatile u_int32_t wait; /* Count of waiters. */
+#endif
+ pid_t pid; /* Process owning mutex */
+ db_threadid_t tid; /* Thread owning mutex */
+
+ db_mutex_t mutex_next_link; /* Linked list of free mutexes. */
+
+#ifdef HAVE_STATISTICS
+ int alloc_id; /* Allocation ID. */
+
+ u_int32_t mutex_set_wait; /* Granted after wait. */
+ u_int32_t mutex_set_nowait; /* Granted without waiting. */
+#ifdef HAVE_SHARED_LATCHES
+ u_int32_t mutex_set_rd_wait; /* Granted shared lock after wait. */
+ u_int32_t mutex_set_rd_nowait; /* Granted shared lock w/out waiting. */
+#endif
+#ifdef HAVE_MUTEX_HYBRID
+ u_int32_t hybrid_wait;
+ u_int32_t hybrid_wakeup; /* for counting spurious wakeups */
+#endif
+#endif
+
+ /*
+ * A subset of the flag arguments for __mutex_alloc().
+ *
+ * Flags should be an unsigned integer even if it's not required by
+ * the possible flags values, getting a single byte on some machines
+ * is expensive, and the mutex structure is a MP hot spot.
+ */
+ volatile u_int32_t flags; /* MUTEX_XXX */
+};
+#endif
+
+/* Macro to get a reference to a specific mutex. */
+#define MUTEXP_SET(mtxmgr, indx) \
+ ((DB_MUTEX *)((u_int8_t *)mtxmgr->mutex_array + \
+ (indx) * ((DB_MUTEXREGION *)mtxmgr->reginfo.primary)->mutex_size))
+
+/* Inverse of the above: get the mutex index from a mutex pointer */
+#define MUTEXP_GET(mtxmgr, mutexp) \
+ (((u_int8_t *) (mutexp) - (u_int8_t *)mtxmgr->mutex_array) / \
+ ((DB_MUTEXREGION *)mtxmgr->reginfo.primary)->mutex_size)
+
+/*
+ * Check that a particular mutex is exclusively held at least by someone, not
+ * necessarily the current thread.
+ */
+#ifdef HAVE_MUTEX_SUPPORT
+#define MUTEX_IS_OWNED(env, mutex) \
+ (mutex == MUTEX_INVALID || !MUTEX_ON(env) || \
+ F_ISSET(env->dbenv, DB_ENV_NOLOCKING) || \
+ F_ISSET(MUTEXP_SET(env->mutex_handle, mutex), DB_MUTEX_LOCKED))
+#else
+#define MUTEX_IS_OWNED(env, mutex) 0
+#endif
+
+#if defined(HAVE_MUTEX_HYBRID) || defined(DB_WIN32) || \
+ (defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_PTHREADS))
+#define MUTEXP_IS_BUSY(mutexp) \
+ (F_ISSET(mutexp, DB_MUTEX_SHARED) ? \
+ (atomic_read(&(mutexp)->sharecount) != 0) : \
+ F_ISSET(mutexp, DB_MUTEX_LOCKED))
+#define MUTEXP_BUSY_FIELD(mutexp) \
+ (F_ISSET(mutexp, DB_MUTEX_SHARED) ? \
+ (atomic_read(&(mutexp)->sharecount)) : (mutexp)->flags)
+#else
+/* Pthread_rwlocks don't have an low-cost 'is it being shared?' predicate. */
+#define MUTEXP_IS_BUSY(mutexp) (F_ISSET((mutexp), DB_MUTEX_LOCKED))
+#define MUTEXP_BUSY_FIELD(mutexp) ((mutexp)->flags)
+#endif
+
+#define MUTEX_IS_BUSY(env, mutex) \
+ (mutex == MUTEX_INVALID || !MUTEX_ON(env) || \
+ F_ISSET(env->dbenv, DB_ENV_NOLOCKING) || \
+ MUTEXP_IS_BUSY(MUTEXP_SET(env->mutex_handle, mutex)))
+
+#define MUTEX_REQUIRED(env, mutex) \
+ DB_ASSERT(env, MUTEX_IS_OWNED(env, mutex))
+
+#define MUTEX_REQUIRED_READ(env, mutex) \
+ DB_ASSERT(env, MUTEX_IS_OWNED(env, mutex) || MUTEX_IS_BUSY(env, mutex))
+
+/*
+ * Test and set (and thus hybrid) shared latches use compare & exchange
+ * to acquire; the others the mutex-setting primitive defined above.
+ */
+#ifdef LOAD_ACTUAL_MUTEX_CODE
+
+#if defined(HAVE_SHARED_LATCHES)
+/* This is the value of the 'sharecount' of an exclusively held tas latch.
+ * The particular value is not special; it is just unlikely to be caused
+ * by releasing or acquiring a shared latch too many times.
+ */
+#define MUTEX_SHARE_ISEXCLUSIVE (-1024)
+
+/*
+ * Get an exclusive lock on a possibly sharable latch. We use the native
+ * MUTEX_SET() operation for non-sharable latches; it usually is faster.
+ */
+#define MUTEXP_ACQUIRE(mutexp) \
+ (F_ISSET(mutexp, DB_MUTEX_SHARED) ? \
+ atomic_compare_exchange(env, \
+ &(mutexp)->sharecount, 0, MUTEX_SHARE_ISEXCLUSIVE) : \
+ MUTEX_SET(&(mutexp)->tas))
+#else
+#define MUTEXP_ACQUIRE(mutexp) MUTEX_SET(&(mutexp)->tas)
+#endif
+
+#ifndef MEMBAR_ENTER
+#define MEMBAR_ENTER()
+#define MEMBAR_EXIT()
+#endif
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_MUTEX_INT_H_ */
diff --git a/db-4.8.30/dbinc/os.h b/db-4.8.30/dbinc/os.h
new file mode 100644
index 0000000..7a60ef0
--- /dev/null
+++ b/db-4.8.30/dbinc/os.h
@@ -0,0 +1,176 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_OS_H_
+#define _DB_OS_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Number of times to retry system calls that return EINTR or EBUSY. */
+#define DB_RETRY 100
+
+#ifdef __TANDEM
+/*
+ * OSS Tandem problem: fsync can return a Guardian file system error of 70,
+ * which has no symbolic name in OSS. HP says to retry the fsync. [#12957]
+ */
+#define RETRY_CHK(op, ret) do { \
+ int __retries, __t_ret; \
+ for ((ret) = 0, __retries = DB_RETRY;;) { \
+ if ((op) == 0) \
+ break; \
+ (ret) = __os_get_syserr(); \
+ if (((__t_ret = __os_posix_err(ret)) == EAGAIN || \
+ __t_ret == EBUSY || __t_ret == EINTR || \
+ __t_ret == EIO || __t_ret == 70) && --__retries > 0)\
+ continue; \
+ break; \
+ } \
+} while (0)
+#else
+#define RETRY_CHK(op, ret) do { \
+ int __retries, __t_ret; \
+ for ((ret) = 0, __retries = DB_RETRY;;) { \
+ if ((op) == 0) \
+ break; \
+ (ret) = __os_get_syserr(); \
+ if (((__t_ret = __os_posix_err(ret)) == EAGAIN || \
+ __t_ret == EBUSY || __t_ret == EINTR || \
+ __t_ret == EIO) && --__retries > 0) \
+ continue; \
+ break; \
+ } \
+} while (0)
+#endif
+
+#define RETRY_CHK_EINTR_ONLY(op, ret) do { \
+ int __retries; \
+ for ((ret) = 0, __retries = DB_RETRY;;) { \
+ if ((op) == 0) \
+ break; \
+ (ret) = __os_get_syserr(); \
+ if (__os_posix_err(ret) == EINTR && --__retries > 0) \
+ continue; \
+ break; \
+ } \
+} while (0)
+
+/*
+ * Flags understood by __os_open.
+ */
+#define DB_OSO_ABSMODE 0x0001 /* Absolute mode specified. */
+#define DB_OSO_CREATE 0x0002 /* POSIX: O_CREAT */
+#define DB_OSO_DIRECT 0x0004 /* Don't buffer the file in the OS. */
+#define DB_OSO_DSYNC 0x0008 /* POSIX: O_DSYNC. */
+#define DB_OSO_EXCL 0x0010 /* POSIX: O_EXCL */
+#define DB_OSO_RDONLY 0x0020 /* POSIX: O_RDONLY */
+#define DB_OSO_REGION 0x0040 /* Opening a region file. */
+#define DB_OSO_SEQ 0x0080 /* Expected sequential access. */
+#define DB_OSO_TEMP 0x0100 /* Remove after last close. */
+#define DB_OSO_TRUNC 0x0200 /* POSIX: O_TRUNC */
+
+/*
+ * File modes.
+ */
+#define DB_MODE_400 (S_IRUSR)
+#define DB_MODE_600 (S_IRUSR|S_IWUSR)
+#define DB_MODE_660 (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP)
+#define DB_MODE_666 (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
+#define DB_MODE_700 (S_IRUSR|S_IWUSR|S_IXUSR)
+
+/*
+ * We group certain seek/write calls into a single function so that we
+ * can use pread(2)/pwrite(2) where they're available.
+ */
+#define DB_IO_READ 1
+#define DB_IO_WRITE 2
+
+/*
+ * Make a last "panic" check. Imagine a thread of control running in Berkeley
+ * DB, going to sleep. Another thread of control decides to run recovery
+ * because the environment is broken. The first thing recovery does is panic
+ * the existing environment, but we only check the panic flag when crossing the
+ * public API. If the sleeping thread wakes up and writes something, we could
+ * have two threads of control writing the log files at the same time. So,
+ * before reading or writing, make a last panic check. Obviously, there's still
+ * a window, but it's very, very small.
+ */
+#define LAST_PANIC_CHECK_BEFORE_IO(env) \
+ PANIC_CHECK(env);
+
+/* DB filehandle. */
+struct __fh_t {
+ /*
+ * Linked list of DB_FH's, linked from the DB_ENV, used to keep track
+ * of all open file handles for resource cleanup.
+ */
+ TAILQ_ENTRY(__fh_t) q;
+
+ /*
+ * The file-handle mutex is only used to protect the handle/fd
+ * across seek and read/write pairs, it does not protect the
+ * the reference count, or any other fields in the structure.
+ */
+ db_mutex_t mtx_fh; /* Mutex to lock. */
+
+ int ref; /* Reference count. */
+
+#ifdef HAVE_BREW
+ IFile *ifp; /* IFile pointer */
+#endif
+#if defined(DB_WIN32)
+ HANDLE handle; /* Windows/32 file handle. */
+ HANDLE trunc_handle; /* Handle for truncate calls. */
+#endif
+ int fd; /* POSIX file descriptor. */
+
+ char *name; /* File name at open. */
+
+ /*
+ * Last seek statistics, used for zero-filling on filesystems
+ * that don't support it directly.
+ */
+ db_pgno_t pgno;
+ u_int32_t pgsize;
+ u_int32_t offset;
+
+#ifdef HAVE_STATISTICS
+ u_int32_t seek_count; /* I/O statistics */
+ u_int32_t read_count;
+ u_int32_t write_count;
+#endif
+
+#define DB_FH_ENVLINK 0x01 /* We're linked on the DB_ENV. */
+#define DB_FH_NOSYNC 0x02 /* Handle doesn't need to be sync'd. */
+#define DB_FH_OPENED 0x04 /* Handle is valid. */
+#define DB_FH_UNLINK 0x08 /* Unlink on close */
+#define DB_FH_REGION 0x10 /* Opened to contain a region */
+ u_int8_t flags;
+};
+
+/* Standard buffer size for ctime/ctime_r function calls. */
+#define CTIME_BUFLEN 26
+
+/*
+ * VxWorks requires we cast (const char *) variables to (char *) in order to
+ * pass them to system calls like stat, read and write.
+ */
+#ifdef HAVE_VXWORKS
+#define CHAR_STAR_CAST (char *)
+#else
+#define CHAR_STAR_CAST
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/os_ext.h"
+#endif /* !_DB_OS_H_ */
diff --git a/db-4.8.30/dbinc/partition.h b/db-4.8.30/dbinc/partition.h
new file mode 100644
index 0000000..ed2888a
--- /dev/null
+++ b/db-4.8.30/dbinc/partition.h
@@ -0,0 +1,54 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ */
+/*
+ * $Id$
+ */
+#ifndef _DB_PART_H_
+#define _DB_PART_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef struct __db_partition {
+ u_int32_t nparts; /* number of partitions. */
+ DBT *keys; /* array of range keys. */
+ void *data; /* the partion info. */
+ const char **dirs; /* locations for partitions. */
+ DB **handles; /* array of partition handles. */
+ u_int32_t (*callback) (DB *, DBT *);
+#define PART_CALLBACK 0x01
+#define PART_RANGE 0x02
+ u_int32_t flags;
+} DB_PARTITION;
+
+/*
+ * Internal part of a partitoned cursor.
+ */
+typedef struct __part_internal {
+ __DBC_INTERNAL
+ u_int32_t part_id;
+ DBC *sub_cursor;
+} PART_CURSOR;
+
+#ifdef HAVE_PARTITION
+#define PART_NAME "__dbp.%s.%03d"
+#define PART_LEN (strlen("__dbp..")+3)
+
+#define DB_IS_PARTITIONED(dbp) \
+ (dbp->p_internal != NULL && \
+ ((DB_PARTITION *)dbp->p_internal)->handles != NULL)
+
+#define DBC_PART_REFRESH(dbc) (F_SET(dbc, DBC_PARTITIONED))
+#else
+#define DBC_PART_REFRESH(dbc)
+#define DB_IS_PARTITIONED(dbp) (0)
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+#endif
diff --git a/db-4.8.30/dbinc/qam.h b/db-4.8.30/dbinc/qam.h
new file mode 100644
index 0000000..9c68971
--- /dev/null
+++ b/db-4.8.30/dbinc/qam.h
@@ -0,0 +1,180 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_QAM_H_
+#define _DB_QAM_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * QAM data elements: a status field and the data.
+ */
+typedef struct _qamdata {
+ u_int8_t flags; /* 00: delete bit. */
+#define QAM_VALID 0x01
+#define QAM_SET 0x02
+ u_int8_t data[1]; /* Record. */
+} QAMDATA;
+
+struct __queue; typedef struct __queue QUEUE;
+struct __qcursor; typedef struct __qcursor QUEUE_CURSOR;
+
+struct __qcursor {
+ /* struct __dbc_internal */
+ __DBC_INTERNAL
+
+ /* Queue private part */
+
+ /* Per-thread information: queue private. */
+ db_recno_t recno; /* Current record number. */
+
+ u_int32_t flags;
+};
+
+typedef struct __mpfarray {
+ u_int32_t n_extent; /* Number of extents in table. */
+ u_int32_t low_extent; /* First extent open. */
+ u_int32_t hi_extent; /* Last extent open. */
+ struct __qmpf {
+ int pinref;
+ DB_MPOOLFILE *mpf;
+ } *mpfarray; /* Array of open extents. */
+} MPFARRAY;
+
+/*
+ * The in-memory, per-tree queue data structure.
+ */
+struct __queue {
+ db_pgno_t q_meta; /* Database meta-data page. */
+ db_pgno_t q_root; /* Database root page. */
+
+ int re_pad; /* Fixed-length padding byte. */
+ u_int32_t re_len; /* Length for fixed-length records. */
+ u_int32_t rec_page; /* records per page */
+ u_int32_t page_ext; /* Pages per extent */
+ MPFARRAY array1, array2; /* File arrays. */
+
+ /* Extent file configuration: */
+ DBT pgcookie; /* Initialized pgcookie. */
+ DB_PGINFO pginfo; /* Initialized pginfo struct. */
+
+ char *path; /* Space allocated to file pathname. */
+ char *name; /* The name of the file. */
+ char *dir; /* The dir of the file. */
+ int mode; /* Mode to open extents. */
+};
+
+/* Format for queue extent names. */
+#define QUEUE_EXTENT "%s%c__dbq.%s.%d"
+#define QUEUE_EXTENT_HEAD "__dbq.%s."
+#define QUEUE_EXTENT_PREFIX "__dbq."
+
+typedef struct __qam_filelist {
+ DB_MPOOLFILE *mpf;
+ u_int32_t id;
+} QUEUE_FILELIST;
+
+/*
+ * Calculate the page number of a recno.
+ *
+ * Number of records per page =
+ * Divide the available space on the page by the record len + header.
+ *
+ * Page number for record =
+ * divide the physical record number by the records per page
+ * add the root page number
+ * For now the root page will always be 1, but we might want to change
+ * in the future (e.g. multiple fixed len queues per file).
+ *
+ * Index of record on page =
+ * physical record number, less the logical pno times records/page
+ */
+#define CALC_QAM_RECNO_PER_PAGE(dbp) \
+ (((dbp)->pgsize - QPAGE_SZ(dbp)) / \
+ (u_int32_t)DB_ALIGN((uintmax_t)SSZA(QAMDATA, data) + \
+ ((QUEUE *)(dbp)->q_internal)->re_len, sizeof(u_int32_t)))
+
+#define QAM_RECNO_PER_PAGE(dbp) (((QUEUE*)(dbp)->q_internal)->rec_page)
+
+#define QAM_RECNO_PAGE(dbp, recno) \
+ (((QUEUE *)(dbp)->q_internal)->q_root \
+ + (((recno) - 1) / QAM_RECNO_PER_PAGE(dbp)))
+
+#define QAM_PAGE_EXTENT(dbp, pgno) \
+ (((pgno) - 1) / ((QUEUE *)(dbp)->q_internal)->page_ext)
+
+#define QAM_RECNO_EXTENT(dbp, recno) \
+ QAM_PAGE_EXTENT(dbp, QAM_RECNO_PAGE(dbp, recno))
+
+#define QAM_RECNO_INDEX(dbp, pgno, recno) \
+ (((recno) - 1) - (QAM_RECNO_PER_PAGE(dbp) \
+ * (pgno - ((QUEUE *)(dbp)->q_internal)->q_root)))
+
+#define QAM_GET_RECORD(dbp, page, index) \
+ ((QAMDATA *)((u_int8_t *)(page) + (QPAGE_SZ(dbp) + \
+ (DB_ALIGN((uintmax_t)SSZA(QAMDATA, data) + \
+ ((QUEUE *)(dbp)->q_internal)->re_len, sizeof(u_int32_t)) * index))))
+
+#define QAM_AFTER_CURRENT(meta, recno) \
+ ((recno) >= (meta)->cur_recno && \
+ ((meta)->first_recno <= (meta)->cur_recno || \
+ ((recno) < (meta)->first_recno && \
+ (recno) - (meta)->cur_recno < (meta)->first_recno - (recno))))
+
+#define QAM_BEFORE_FIRST(meta, recno) \
+ ((recno) < (meta)->first_recno && \
+ ((meta)->first_recno <= (meta)->cur_recno || \
+ ((recno) > (meta)->cur_recno && \
+ (recno) - (meta)->cur_recno > (meta)->first_recno - (recno))))
+
+#define QAM_NOT_VALID(meta, recno) \
+ (recno == RECNO_OOB || \
+ QAM_BEFORE_FIRST(meta, recno) || QAM_AFTER_CURRENT(meta, recno))
+
+/*
+ * Log opcodes for the mvptr routine.
+ */
+#define QAM_SETFIRST 0x01
+#define QAM_SETCUR 0x02
+#define QAM_TRUNCATE 0x04
+
+typedef enum {
+ QAM_PROBE_GET,
+ QAM_PROBE_PUT,
+ QAM_PROBE_DIRTY,
+ QAM_PROBE_MPF
+} qam_probe_mode;
+
+/*
+ * Ops for __qam_nameop.
+ */
+typedef enum {
+ QAM_NAME_DISCARD,
+ QAM_NAME_RENAME,
+ QAM_NAME_REMOVE
+} qam_name_op;
+
+#define __qam_fget(dbc, pgnoaddr, flags, addrp) \
+ __qam_fprobe(dbc, *pgnoaddr, \
+ addrp, QAM_PROBE_GET, DB_PRIORITY_UNCHANGED, flags)
+
+#define __qam_fput(dbc, pgno, addrp, priority) \
+ __qam_fprobe(dbc, pgno, addrp, QAM_PROBE_PUT, priority, 0)
+
+#define __qam_dirty(dbc, pgno, pagep, priority) \
+ __qam_fprobe(dbc, pgno, pagep, QAM_PROBE_DIRTY, priority, 0)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/qam_auto.h"
+#include "dbinc_auto/qam_ext.h"
+#endif /* !_DB_QAM_H_ */
diff --git a/db-4.8.30/dbinc/queue.h b/db-4.8.30/dbinc/queue.h
new file mode 100644
index 0000000..d76f201
--- /dev/null
+++ b/db-4.8.30/dbinc/queue.h
@@ -0,0 +1,563 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)queue.h 8.5 (Berkeley) 8/20/94
+ * $FreeBSD: src/sys/sys/queue.h,v 1.54 2002/08/05 05:18:43 alfred Exp $
+ */
+
+#ifndef _DB_QUEUE_H_
+#define _DB_QUEUE_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * This file defines four types of data structures: singly-linked lists,
+ * singly-linked tail queues, lists and tail queues.
+ *
+ * A singly-linked list is headed by a single forward pointer. The elements
+ * are singly linked for minimum space and pointer manipulation overhead at
+ * the expense of O(n) removal for arbitrary elements. New elements can be
+ * added to the list after an existing element or at the head of the list.
+ * Elements being removed from the head of the list should use the explicit
+ * macro for this purpose for optimum efficiency. A singly-linked list may
+ * only be traversed in the forward direction. Singly-linked lists are ideal
+ * for applications with large datasets and few or no removals or for
+ * implementing a LIFO queue.
+ *
+ * A singly-linked tail queue is headed by a pair of pointers, one to the
+ * head of the list and the other to the tail of the list. The elements are
+ * singly linked for minimum space and pointer manipulation overhead at the
+ * expense of O(n) removal for arbitrary elements. New elements can be added
+ * to the list after an existing element, at the head of the list, or at the
+ * end of the list. Elements being removed from the head of the tail queue
+ * should use the explicit macro for this purpose for optimum efficiency.
+ * A singly-linked tail queue may only be traversed in the forward direction.
+ * Singly-linked tail queues are ideal for applications with large datasets
+ * and few or no removals or for implementing a FIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may only be traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ *
+ *
+ * SLIST LIST STAILQ TAILQ
+ * _HEAD + + + +
+ * _HEAD_INITIALIZER + + + +
+ * _ENTRY + + + +
+ * _INIT + + + +
+ * _EMPTY + + + +
+ * _FIRST + + + +
+ * _NEXT + + + +
+ * _PREV - - - +
+ * _LAST - - + +
+ * _FOREACH + + + +
+ * _FOREACH_REVERSE - - - +
+ * _INSERT_HEAD + + + +
+ * _INSERT_BEFORE - + - +
+ * _INSERT_AFTER + + + +
+ * _INSERT_TAIL - - + +
+ * _CONCAT - - + +
+ * _REMOVE_HEAD + - + -
+ * _REMOVE + + + +
+ *
+ */
+
+/*
+ * XXX
+ * We #undef all of the macros because there are incompatible versions of this
+ * file and these macros on various systems. What makes the problem worse is
+ * they are included and/or defined by system include files which we may have
+ * already loaded into Berkeley DB before getting here. For example, FreeBSD's
+ * <rpc/rpc.h> includes its system <sys/queue.h>, and VxWorks UnixLib.h defines
+ * several of the LIST_XXX macros. Visual C.NET 7.0 also defines some of these
+ * same macros in Vc7\PlatformSDK\Include\WinNT.h. Make sure we use ours.
+ */
+#undef LIST_EMPTY
+#undef LIST_ENTRY
+#undef LIST_FIRST
+#undef LIST_FOREACH
+#undef LIST_HEAD
+#undef LIST_HEAD_INITIALIZER
+#undef LIST_INIT
+#undef LIST_INSERT_AFTER
+#undef LIST_INSERT_BEFORE
+#undef LIST_INSERT_HEAD
+#undef LIST_NEXT
+#undef LIST_REMOVE
+#undef QMD_TRACE_ELEM
+#undef QMD_TRACE_HEAD
+#undef QUEUE_MACRO_DEBUG
+#undef SLIST_EMPTY
+#undef SLIST_ENTRY
+#undef SLIST_FIRST
+#undef SLIST_FOREACH
+#undef SLIST_FOREACH_PREVPTR
+#undef SLIST_HEAD
+#undef SLIST_HEAD_INITIALIZER
+#undef SLIST_INIT
+#undef SLIST_INSERT_AFTER
+#undef SLIST_INSERT_HEAD
+#undef SLIST_NEXT
+#undef SLIST_REMOVE
+#undef SLIST_REMOVE_HEAD
+#undef STAILQ_CONCAT
+#undef STAILQ_EMPTY
+#undef STAILQ_ENTRY
+#undef STAILQ_FIRST
+#undef STAILQ_FOREACH
+#undef STAILQ_HEAD
+#undef STAILQ_HEAD_INITIALIZER
+#undef STAILQ_INIT
+#undef STAILQ_INSERT_AFTER
+#undef STAILQ_INSERT_HEAD
+#undef STAILQ_INSERT_TAIL
+#undef STAILQ_LAST
+#undef STAILQ_NEXT
+#undef STAILQ_REMOVE
+#undef STAILQ_REMOVE_HEAD
+#undef STAILQ_REMOVE_HEAD_UNTIL
+#undef TAILQ_CONCAT
+#undef TAILQ_EMPTY
+#undef TAILQ_ENTRY
+#undef TAILQ_FIRST
+#undef TAILQ_FOREACH
+#undef TAILQ_FOREACH_REVERSE
+#undef TAILQ_HEAD
+#undef TAILQ_HEAD_INITIALIZER
+#undef TAILQ_INIT
+#undef TAILQ_INSERT_AFTER
+#undef TAILQ_INSERT_BEFORE
+#undef TAILQ_INSERT_HEAD
+#undef TAILQ_INSERT_TAIL
+#undef TAILQ_LAST
+#undef TAILQ_NEXT
+#undef TAILQ_PREV
+#undef TAILQ_REMOVE
+#undef TRACEBUF
+#undef TRASHIT
+
+#define QUEUE_MACRO_DEBUG 0
+#if QUEUE_MACRO_DEBUG
+/* Store the last 2 places the queue element or head was altered */
+struct qm_trace {
+ char * lastfile;
+ int lastline;
+ char * prevfile;
+ int prevline;
+};
+
+#define TRACEBUF struct qm_trace trace;
+#define TRASHIT(x) do {(x) = (void *)-1;} while (0)
+
+#define QMD_TRACE_HEAD(head) do { \
+ (head)->trace.prevline = (head)->trace.lastline; \
+ (head)->trace.prevfile = (head)->trace.lastfile; \
+ (head)->trace.lastline = __LINE__; \
+ (head)->trace.lastfile = __FILE__; \
+} while (0)
+
+#define QMD_TRACE_ELEM(elem) do { \
+ (elem)->trace.prevline = (elem)->trace.lastline; \
+ (elem)->trace.prevfile = (elem)->trace.lastfile; \
+ (elem)->trace.lastline = __LINE__; \
+ (elem)->trace.lastfile = __FILE__; \
+} while (0)
+
+#else
+#define QMD_TRACE_ELEM(elem)
+#define QMD_TRACE_HEAD(head)
+#define TRACEBUF
+#define TRASHIT(x)
+#endif /* QUEUE_MACRO_DEBUG */
+
+/*
+ * Singly-linked List declarations.
+ */
+#define SLIST_HEAD(name, type) \
+struct name { \
+ struct type *slh_first; /* first element */ \
+}
+
+#define SLIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define SLIST_ENTRY(type) \
+struct { \
+ struct type *sle_next; /* next element */ \
+}
+
+/*
+ * Singly-linked List functions.
+ */
+#define SLIST_EMPTY(head) ((head)->slh_first == NULL)
+
+#define SLIST_FIRST(head) ((head)->slh_first)
+
+#define SLIST_FOREACH(var, head, field) \
+ for ((var) = SLIST_FIRST((head)); \
+ (var); \
+ (var) = SLIST_NEXT((var), field))
+
+#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \
+ for ((varp) = &SLIST_FIRST((head)); \
+ ((var) = *(varp)) != NULL; \
+ (varp) = &SLIST_NEXT((var), field))
+
+#define SLIST_INIT(head) do { \
+ SLIST_FIRST((head)) = NULL; \
+} while (0)
+
+#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \
+ SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \
+ SLIST_NEXT((slistelm), field) = (elm); \
+} while (0)
+
+#define SLIST_INSERT_HEAD(head, elm, field) do { \
+ SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \
+ SLIST_FIRST((head)) = (elm); \
+} while (0)
+
+#define SLIST_NEXT(elm, field) ((elm)->field.sle_next)
+
+#define SLIST_REMOVE(head, elm, type, field) do { \
+ if (SLIST_FIRST((head)) == (elm)) { \
+ SLIST_REMOVE_HEAD((head), field); \
+ } \
+ else { \
+ struct type *curelm = SLIST_FIRST((head)); \
+ while (SLIST_NEXT(curelm, field) != (elm)) \
+ curelm = SLIST_NEXT(curelm, field); \
+ SLIST_NEXT(curelm, field) = \
+ SLIST_NEXT(SLIST_NEXT(curelm, field), field); \
+ } \
+} while (0)
+
+#define SLIST_REMOVE_HEAD(head, field) do { \
+ SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \
+} while (0)
+
+/*
+ * Singly-linked Tail queue declarations.
+ */
+#define STAILQ_HEAD(name, type) \
+struct name { \
+ struct type *stqh_first;/* first element */ \
+ struct type **stqh_last;/* addr of last next element */ \
+}
+
+#define STAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).stqh_first }
+
+#define STAILQ_ENTRY(type) \
+struct { \
+ struct type *stqe_next; /* next element */ \
+}
+
+/*
+ * Singly-linked Tail queue functions.
+ */
+#define STAILQ_CONCAT(head1, head2) do { \
+ if (!STAILQ_EMPTY((head2))) { \
+ *(head1)->stqh_last = (head2)->stqh_first; \
+ (head1)->stqh_last = (head2)->stqh_last; \
+ STAILQ_INIT((head2)); \
+ } \
+} while (0)
+
+#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL)
+
+#define STAILQ_FIRST(head) ((head)->stqh_first)
+
+#define STAILQ_FOREACH(var, head, field) \
+ for ((var) = STAILQ_FIRST((head)); \
+ (var); \
+ (var) = STAILQ_NEXT((var), field))
+
+#define STAILQ_INIT(head) do { \
+ STAILQ_FIRST((head)) = NULL; \
+ (head)->stqh_last = &STAILQ_FIRST((head)); \
+} while (0)
+
+#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \
+ if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+ STAILQ_NEXT((tqelm), field) = (elm); \
+} while (0)
+
+#define STAILQ_INSERT_HEAD(head, elm, field) do { \
+ if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+ STAILQ_FIRST((head)) = (elm); \
+} while (0)
+
+#define STAILQ_INSERT_TAIL(head, elm, field) do { \
+ STAILQ_NEXT((elm), field) = NULL; \
+ *(head)->stqh_last = (elm); \
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+} while (0)
+
+#define STAILQ_LAST(head, type, field) \
+ (STAILQ_EMPTY((head)) ? \
+ NULL : \
+ ((struct type *) \
+ ((char *)((head)->stqh_last) - __offsetof(struct type, field))))
+
+#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next)
+
+#define STAILQ_REMOVE(head, elm, type, field) do { \
+ if (STAILQ_FIRST((head)) == (elm)) { \
+ STAILQ_REMOVE_HEAD((head), field); \
+ } \
+ else { \
+ struct type *curelm = STAILQ_FIRST((head)); \
+ while (STAILQ_NEXT(curelm, field) != (elm)) \
+ curelm = STAILQ_NEXT(curelm, field); \
+ if ((STAILQ_NEXT(curelm, field) = \
+ STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\
+ (head)->stqh_last = &STAILQ_NEXT((curelm), field);\
+ } \
+} while (0)
+
+#define STAILQ_REMOVE_HEAD(head, field) do { \
+ if ((STAILQ_FIRST((head)) = \
+ STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \
+ (head)->stqh_last = &STAILQ_FIRST((head)); \
+} while (0)
+
+#define STAILQ_REMOVE_HEAD_UNTIL(head, elm, field) do { \
+ if ((STAILQ_FIRST((head)) = STAILQ_NEXT((elm), field)) == NULL) \
+ (head)->stqh_last = &STAILQ_FIRST((head)); \
+} while (0)
+
+/*
+ * List declarations.
+ */
+#define LIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define LIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define LIST_ENTRY(type) \
+struct { \
+ struct type *le_next; /* next element */ \
+ struct type **le_prev; /* address of previous next element */ \
+}
+
+/*
+ * List functions.
+ */
+
+#define LIST_EMPTY(head) ((head)->lh_first == NULL)
+
+#define LIST_FIRST(head) ((head)->lh_first)
+
+#define LIST_FOREACH(var, head, field) \
+ for ((var) = LIST_FIRST((head)); \
+ (var); \
+ (var) = LIST_NEXT((var), field))
+
+#define LIST_INIT(head) do { \
+ LIST_FIRST((head)) = NULL; \
+} while (0)
+
+#define LIST_INSERT_AFTER(listelm, elm, field) do { \
+ if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\
+ LIST_NEXT((listelm), field)->field.le_prev = \
+ &LIST_NEXT((elm), field); \
+ LIST_NEXT((listelm), field) = (elm); \
+ (elm)->field.le_prev = &LIST_NEXT((listelm), field); \
+} while (0)
+
+#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.le_prev = (listelm)->field.le_prev; \
+ LIST_NEXT((elm), field) = (listelm); \
+ *(listelm)->field.le_prev = (elm); \
+ (listelm)->field.le_prev = &LIST_NEXT((elm), field); \
+} while (0)
+
+#define LIST_INSERT_HEAD(head, elm, field) do { \
+ if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \
+ LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\
+ LIST_FIRST((head)) = (elm); \
+ (elm)->field.le_prev = &LIST_FIRST((head)); \
+} while (0)
+
+#define LIST_NEXT(elm, field) ((elm)->field.le_next)
+
+#define LIST_REMOVE(elm, field) do { \
+ if (LIST_NEXT((elm), field) != NULL) \
+ LIST_NEXT((elm), field)->field.le_prev = \
+ (elm)->field.le_prev; \
+ *(elm)->field.le_prev = LIST_NEXT((elm), field); \
+} while (0)
+
+/*
+ * Tail queue declarations.
+ */
+#define TAILQ_HEAD(name, type) \
+struct name { \
+ struct type *tqh_first; /* first element */ \
+ struct type **tqh_last; /* addr of last next element */ \
+ TRACEBUF \
+}
+
+#define TAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).tqh_first }
+
+#define TAILQ_ENTRY(type) \
+struct { \
+ struct type *tqe_next; /* next element */ \
+ struct type **tqe_prev; /* address of previous next element */ \
+ TRACEBUF \
+}
+
+/*
+ * Tail queue functions.
+ */
+#define TAILQ_CONCAT(head1, head2, field) do { \
+ if (!TAILQ_EMPTY(head2)) { \
+ *(head1)->tqh_last = (head2)->tqh_first; \
+ (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \
+ (head1)->tqh_last = (head2)->tqh_last; \
+ TAILQ_INIT((head2)); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_HEAD(head2); \
+ } \
+} while (0)
+
+#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+
+#define TAILQ_FIRST(head) ((head)->tqh_first)
+
+#define TAILQ_FOREACH(var, head, field) \
+ for ((var) = TAILQ_FIRST((head)); \
+ (var); \
+ (var) = TAILQ_NEXT((var), field))
+
+#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \
+ for ((var) = TAILQ_LAST((head), headname); \
+ (var); \
+ (var) = TAILQ_PREV((var), headname, field))
+
+#define TAILQ_INIT(head) do { \
+ TAILQ_FIRST((head)) = NULL; \
+ (head)->tqh_last = &TAILQ_FIRST((head)); \
+ QMD_TRACE_HEAD(head); \
+} while (0)
+
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\
+ TAILQ_NEXT((elm), field)->field.tqe_prev = \
+ &TAILQ_NEXT((elm), field); \
+ else { \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_HEAD(head); \
+ } \
+ TAILQ_NEXT((listelm), field) = (elm); \
+ (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+ QMD_TRACE_ELEM(&listelm->field); \
+} while (0)
+
+#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
+ TAILQ_NEXT((elm), field) = (listelm); \
+ *(listelm)->field.tqe_prev = (elm); \
+ (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+ QMD_TRACE_ELEM(&listelm->field); \
+} while (0)
+
+#define TAILQ_INSERT_HEAD(head, elm, field) do { \
+ if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \
+ TAILQ_FIRST((head))->field.tqe_prev = \
+ &TAILQ_NEXT((elm), field); \
+ else \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ TAILQ_FIRST((head)) = (elm); \
+ (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_INSERT_TAIL(head, elm, field) do { \
+ TAILQ_NEXT((elm), field) = NULL; \
+ (elm)->field.tqe_prev = (head)->tqh_last; \
+ *(head)->tqh_last = (elm); \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define TAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+#define TAILQ_REMOVE(head, elm, field) do { \
+ if ((TAILQ_NEXT((elm), field)) != NULL) \
+ TAILQ_NEXT((elm), field)->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else { \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ QMD_TRACE_HEAD(head); \
+ } \
+ *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \
+ TRASHIT((elm)->field.tqe_next); \
+ TRASHIT((elm)->field.tqe_prev); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_QUEUE_H_ */
diff --git a/db-4.8.30/dbinc/region.h b/db-4.8.30/dbinc/region.h
new file mode 100644
index 0000000..62cc79e
--- /dev/null
+++ b/db-4.8.30/dbinc/region.h
@@ -0,0 +1,285 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_REGION_H_
+#define _DB_REGION_H_
+
+/*
+ * The DB environment consists of some number of "regions", which are described
+ * by the following four structures:
+ *
+ * REGENV -- shared information about the environment
+ * REGENV_REF -- file describing system memory version of REGENV
+ * REGION -- shared information about a single region
+ * REGINFO -- per-process information about a REGION
+ *
+ * There are three types of memory that hold regions:
+ * per-process heap (malloc)
+ * file mapped into memory (mmap, MapViewOfFile)
+ * system memory (shmget, CreateFileMapping)
+ *
+ * By default, regions are created in filesystem-backed shared memory. They
+ * can also be created in system shared memory (DB_SYSTEM_MEM), or, if private
+ * to a process, in heap memory (DB_PRIVATE).
+ *
+ * Regions in the filesystem are named "__db.001", "__db.002" and so on. If
+ * we're not using a private environment allocated in heap, "__db.001" will
+ * always exist, as we use it to synchronize on the regions, whether they are
+ * in filesystem-backed memory or system memory.
+ *
+ * The file "__db.001" contains a REGENV structure and an array of REGION
+ * structures. Each REGION structures describes an underlying chunk of
+ * shared memory.
+ *
+ * __db.001
+ * +---------+
+ * |REGENV |
+ * +---------+ +----------+
+ * |REGION |-> | __db.002 |
+ * | | +----------+
+ * +---------+ +----------+
+ * |REGION |-> | __db.003 |
+ * | | +----------+
+ * +---------+ +----------+
+ * |REGION |-> | __db.004 |
+ * | | +----------+
+ * +---------+
+ *
+ * The tricky part about manipulating the regions is creating or joining the
+ * database environment. We have to be sure only a single thread of control
+ * creates and/or recovers a database environment. All other threads should
+ * then join without seeing inconsistent data.
+ *
+ * We do this in two parts: first, we use the underlying O_EXCL flag to the
+ * open system call to serialize creation of the __db.001 file. The thread
+ * of control creating that file then proceeds to create the remaining
+ * regions in the environment, including the mutex region. Once the mutex
+ * region has been created, the creating thread of control fills in the
+ * __db.001 file's magic number. Other threads of control (the ones that
+ * didn't create the __db.001 file), wait on the initialization of the
+ * __db.001 file's magic number. After it has been initialized, all threads
+ * of control can proceed, using normal shared mutex locking procedures for
+ * exclusion.
+ *
+ * REGIONs are not moved or removed during the life of the environment, and
+ * so processes can have long-lived references to them.
+ *
+ * One of the REGION structures describes the environment region itself.
+ *
+ * The REGION array is not locked in any way. It's an array so we don't have
+ * to manipulate data structures after a crash -- on some systems, we have to
+ * join and clean up the mutex region after application failure. Using an
+ * array means we don't have to worry about broken links or other nastiness
+ * after the failure.
+ *
+ * All requests to create or join a region return a REGINFO structure, which
+ * is held by the caller and used to open and subsequently close the reference
+ * to the region. The REGINFO structure contains the per-process information
+ * that we need to access the region.
+ *
+ * The one remaining complication. If the regions (including the environment
+ * region) live in system memory, and the system memory isn't "named" somehow
+ * in the filesystem name space, we need some way of finding it. Do this by
+ * by writing the REGENV_REF structure into the "__db.001" file. When we find
+ * a __db.001 file that is too small to be a real, on-disk environment, we use
+ * the information it contains to redirect to the real "__db.001" file/memory.
+ * This currently only happens when the REGENV file is in shared system memory.
+ *
+ * Although DB does not currently grow regions when they run out of memory, it
+ * would be possible to do so. To grow a region, allocate a new region of the
+ * appropriate size, then copy the old region over it and insert the additional
+ * memory into the already existing shalloc arena. Region users must reset
+ * their base addresses and any local pointers into the memory, of course.
+ * This failed in historic versions of DB because the region mutexes lived in
+ * the mapped memory, and when it was unmapped and remapped (or copied),
+ * threads could lose track of it. Also, some systems didn't support mutex
+ * copying, e.g., from OSF1 V4.0:
+ *
+ * The address of an msemaphore structure may be significant. If the
+ * msemaphore structure contains any value copied from an msemaphore
+ * structure at a different address, the result is undefined.
+ *
+ * All mutexes are now maintained in a separate region which is never unmapped,
+ * so growing regions should be possible.
+ */
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DB_REGION_PREFIX "__db" /* DB file name prefix. */
+#define DB_REGION_FMT "__db.%03d" /* Region file name format. */
+#define DB_REGION_ENV "__db.001" /* Primary environment name. */
+
+#define INVALID_REGION_ID 0 /* Out-of-band region ID. */
+#define REGION_ID_ENV 1 /* Primary environment ID. */
+
+typedef enum {
+ INVALID_REGION_TYPE=0, /* Region type. */
+ REGION_TYPE_ENV,
+ REGION_TYPE_LOCK,
+ REGION_TYPE_LOG,
+ REGION_TYPE_MPOOL,
+ REGION_TYPE_MUTEX,
+ REGION_TYPE_TXN } reg_type_t;
+
+#define INVALID_REGION_SEGID -1 /* Segment IDs are either shmget(2) or
+ * Win16 segment identifiers. They are
+ * both stored in a "long", and we need
+ * an out-of-band value.
+ */
+/*
+ * Nothing can live at region offset 0, because, in all cases, that's where
+ * we store *something*. Lots of code needs an out-of-band value for region
+ * offsets, so we use 0.
+ */
+#define INVALID_ROFF 0
+
+/* Reference describing system memory version of REGENV. */
+typedef struct __db_reg_env_ref {
+ roff_t size; /* Region size. */
+ long segid; /* UNIX shmget ID, VxWorks ID. */
+} REGENV_REF;
+
+/* Per-environment region information. */
+typedef struct __db_reg_env {
+ /*
+ * !!!
+ * The magic, panic, version, envid and signature fields of the region
+ * are fixed in size, the timestamp field is the first field which is
+ * variable length. These fields must never change in order, to
+ * guarantee we can always read them, no matter what release we have.
+ *
+ * !!!
+ * The magic and panic fields are NOT protected by any mutex, and for
+ * this reason cannot be anything more complicated than zero/non-zero.
+ */
+ u_int32_t magic; /* Valid region magic number. */
+ u_int32_t panic; /* Environment is dead. */
+
+ u_int32_t majver; /* Major DB version number. */
+ u_int32_t minver; /* Minor DB version number. */
+ u_int32_t patchver; /* Patch DB version number. */
+
+ u_int32_t envid; /* Unique environment ID. */
+
+ u_int32_t signature; /* Structure signatures. */
+
+ time_t timestamp; /* Creation time. */
+
+ u_int32_t init_flags; /* Flags environment initialized with.*/
+
+ /*
+ * The mtx_regenv mutex protects the environment reference count and
+ * memory allocation from the primary shared region (the crypto, thread
+ * control block and replication implementations allocate memory from
+ * the primary shared region).
+ *
+ * The rest of the fields are initialized at creation time, and don't
+ * need mutex protection. The flags, op_timestamp and rep_timestamp
+ * fields are used by replication only and are protected by the
+ * replication mutex. The rep_timestamp is is not protected when it
+ * is used in recovery as that is already single threaded.
+ */
+ db_mutex_t mtx_regenv; /* Refcnt, region allocation mutex. */
+ u_int32_t refcnt; /* References to the environment. */
+
+ u_int32_t region_cnt; /* Number of REGIONs. */
+ roff_t region_off; /* Offset of region array */
+
+ roff_t cipher_off; /* Offset of cipher area */
+
+ roff_t thread_off; /* Offset of the thread area. */
+
+ roff_t rep_off; /* Offset of the replication area. */
+#define DB_REGENV_REPLOCKED 0x0001 /* Env locked for rep backup. */
+ u_int32_t flags; /* Shared environment flags. */
+#define DB_REGENV_TIMEOUT 30 /* Backup timeout. */
+ time_t op_timestamp; /* Timestamp for operations. */
+ time_t rep_timestamp; /* Timestamp for rep db handles. */
+ u_int32_t reg_panic; /* DB_REGISTER triggered panic */
+ uintmax_t unused; /* The ALLOC_LAYOUT structure follows
+ * the REGENV structure in memory and
+ * contains uintmax_t fields. Force
+ * proper alignment of that structure.
+ */
+} REGENV;
+
+/* Per-region shared region information. */
+typedef struct __db_region {
+ u_int32_t id; /* Region id. */
+ reg_type_t type; /* Region type. */
+
+ roff_t size; /* Region size in bytes. */
+
+ roff_t primary; /* Primary data structure offset. */
+
+ long segid; /* UNIX shmget(2), Win16 segment ID. */
+} REGION;
+
+/*
+ * Per-process/per-attachment information about a single region.
+ */
+struct __db_reginfo_t { /* __env_region_attach IN parameters. */
+ ENV *env; /* Enclosing environment. */
+ reg_type_t type; /* Region type. */
+ u_int32_t id; /* Region id. */
+
+ /* env_region_attach OUT parameters. */
+ REGION *rp; /* Shared region. */
+
+ char *name; /* Region file name. */
+
+ void *addr; /* Region address. */
+ void *primary; /* Primary data structure address. */
+
+ size_t max_alloc; /* Maximum bytes allocated. */
+ size_t allocated; /* Bytes allocated. */
+
+ db_mutex_t mtx_alloc; /* number of mutex for allocation. */
+
+#ifdef DB_WIN32
+ HANDLE wnt_handle; /* Win/NT HANDLE. */
+#endif
+
+#define REGION_CREATE 0x01 /* Caller created region. */
+#define REGION_CREATE_OK 0x02 /* Caller willing to create region. */
+#define REGION_JOIN_OK 0x04 /* Caller is looking for a match. */
+ u_int32_t flags;
+};
+
+/*
+ * R_ADDR Return a per-process address for a shared region offset.
+ * R_OFFSET Return a shared region offset for a per-process address.
+ */
+#define R_ADDR(reginfop, offset) \
+ (F_ISSET((reginfop)->env, ENV_PRIVATE) ? \
+ (void *)(offset) : \
+ (void *)((u_int8_t *)((reginfop)->addr) + (offset)))
+#define R_OFFSET(reginfop, p) \
+ (F_ISSET((reginfop)->env, ENV_PRIVATE) ? \
+ (roff_t)(p) : \
+ (roff_t)((u_int8_t *)(p) - (u_int8_t *)(reginfop)->addr))
+
+/*
+ * PANIC_ISSET, PANIC_CHECK:
+ * Check to see if the DB environment is dead.
+ */
+#define PANIC_ISSET(env) \
+ ((env) != NULL && (env)->reginfo != NULL && \
+ ((REGENV *)(env)->reginfo->primary)->panic != 0 && \
+ !F_ISSET((env)->dbenv, DB_ENV_NOPANIC))
+
+#define PANIC_CHECK(env) \
+ if (PANIC_ISSET(env)) \
+ return (__env_panic_msg(env));
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_REGION_H_ */
diff --git a/db-4.8.30/dbinc/rep.h b/db-4.8.30/dbinc/rep.h
new file mode 100644
index 0000000..c11213c
--- /dev/null
+++ b/db-4.8.30/dbinc/rep.h
@@ -0,0 +1,831 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2001-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_REP_H_
+#define _DB_REP_H_
+
+#include "dbinc_auto/rep_auto.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Names of client temp databases.
+ */
+#define REPDBNAME "__db.rep.db"
+#define REPPAGENAME "__db.reppg.db"
+
+/*
+ * Message types
+ */
+#define REP_INVALID 0 /* Invalid message type. */
+#define REP_ALIVE 1 /* I am alive message. */
+#define REP_ALIVE_REQ 2 /* Request for alive messages. */
+#define REP_ALL_REQ 3 /* Request all log records greater than LSN. */
+#define REP_BULK_LOG 4 /* Bulk transfer of log records. */
+#define REP_BULK_PAGE 5 /* Bulk transfer of pages. */
+#define REP_DUPMASTER 6 /* Duplicate master detected; propagate. */
+#define REP_FILE 7 /* Page of a database file. NOTUSED */
+#define REP_FILE_FAIL 8 /* File requested does not exist. */
+#define REP_FILE_REQ 9 /* Request for a database file. NOTUSED */
+#define REP_LEASE_GRANT 10 /* Client grants a lease to a master. */
+#define REP_LOG 11 /* Log record. */
+#define REP_LOG_MORE 12 /* There are more log records to request. */
+#define REP_LOG_REQ 13 /* Request for a log record. */
+#define REP_MASTER_REQ 14 /* Who is the master */
+#define REP_NEWCLIENT 15 /* Announces the presence of a new client. */
+#define REP_NEWFILE 16 /* Announce a log file change. */
+#define REP_NEWMASTER 17 /* Announces who the master is. */
+#define REP_NEWSITE 18 /* Announces that a site has heard from a new
+ * site; like NEWCLIENT, but indirect. A
+ * NEWCLIENT message comes directly from the new
+ * client while a NEWSITE comes indirectly from
+ * someone who heard about a NEWSITE.
+ */
+#define REP_PAGE 19 /* Database page. */
+#define REP_PAGE_FAIL 20 /* Requested page does not exist. */
+#define REP_PAGE_MORE 21 /* There are more pages to request. */
+#define REP_PAGE_REQ 22 /* Request for a database page. */
+#define REP_REREQUEST 23 /* Force rerequest. */
+#define REP_START_SYNC 24 /* Tell client to begin syncing a ckp.*/
+#define REP_UPDATE 25 /* Environment hotcopy information. */
+#define REP_UPDATE_REQ 26 /* Request for hotcopy information. */
+#define REP_VERIFY 27 /* A log record for verification. */
+#define REP_VERIFY_FAIL 28 /* The client is outdated. */
+#define REP_VERIFY_REQ 29 /* Request for a log record to verify. */
+#define REP_VOTE1 30 /* Send out your information for an election. */
+#define REP_VOTE2 31 /* Send a "you are master" vote. */
+/*
+ * Maximum message number for conversion tables. Update this
+ * value as the largest message number above increases.
+ *
+ * !!!
+ * NOTE: When changing messages above, the two tables for upgrade support
+ * need adjusting. They are in rep_util.c.
+ */
+#define REP_MAX_MSG 31
+
+/*
+ * This is the list of client-to-client requests messages.
+ * We use this to decide if we're doing client-to-client and
+ * might need to send a rerequest.
+ */
+#define REP_MSG_REQ(rectype) \
+ (rectype == REP_ALL_REQ || \
+ rectype == REP_LOG_REQ || \
+ rectype == REP_PAGE_REQ || \
+ rectype == REP_VERIFY_REQ)
+
+/*
+ * Note that the version information should be at the beginning of the
+ * structure, so that we can rearrange the rest of it while letting the
+ * version checks continue to work. DB_REPVERSION should be revved any time
+ * the rest of the structure changes or when the message numbers change.
+ *
+ * Define also, the corresponding log versions that are tied to the
+ * replication/release versions. These are only used in replication
+ * and that is why they're defined here.
+ */
+#define DB_LOGVERSION_42 8
+#define DB_LOGVERSION_43 10
+#define DB_LOGVERSION_44 11
+#define DB_LOGVERSION_45 12
+#define DB_LOGVERSION_46 13
+#define DB_LOGVERSION_47 14
+#define DB_LOGVERSION_48 15
+#define DB_LOGVERSION_MIN DB_LOGVERSION_44
+#define DB_REPVERSION_INVALID 0
+#define DB_REPVERSION_44 3
+#define DB_REPVERSION_45 3
+#define DB_REPVERSION_46 4
+#define DB_REPVERSION_47 5
+#define DB_REPVERSION_48 5
+#define DB_REPVERSION DB_REPVERSION_48
+#define DB_REPVERSION_MIN DB_REPVERSION_44
+
+/*
+ * RPRINT
+ * REP_PRINT_MESSAGE
+ * Macros for verbose replication messages.
+ */
+#define RPRINT(env, verbose_category, x) do { \
+ if (FLD_ISSET((env)->dbenv->verbose, \
+ (verbose_category) | DB_VERB_REPLICATION)) { \
+ __rep_print x; \
+ } \
+} while (0)
+#define REP_PRINT_MESSAGE(env, eid, rp, str, fl) do { \
+ if (FLD_ISSET((env)->dbenv->verbose, \
+ DB_VERB_REP_MSGS | DB_VERB_REPLICATION)) { \
+ __rep_print_message(env, eid, rp, str, fl); \
+ } \
+} while (0)
+
+/*
+ * Election gen file name
+ * The file contains an egen number for an election this client has NOT
+ * participated in. I.e. it is the number of a future election. We
+ * create it when we create the rep region, if it doesn't already exist
+ * and initialize egen to 1. If it does exist, we read it when we create
+ * the rep region. We write it immediately before sending our VOTE1 in
+ * an election. That way, if a client has ever sent a vote for any
+ * election, the file is already going to be updated to reflect a future
+ * election, should it crash.
+ */
+#define REP_EGENNAME "__db.rep.egen"
+#define REP_GENNAME "__db.rep.gen"
+
+/*
+ * Internal init flag file name:
+ * The existence of this file serves as an indication that the client is in the
+ * process of Internal Initialization, in case it crashes before completing.
+ * During internal init the client's partially reconstructed database pages and
+ * logs may be in an inconsistent state, so much so that running recovery must
+ * be avoided. Furthermore, there is no other way to reliably recognize this
+ * condition. Therefore, when we open an environment, and we're just about to
+ * run recovery, we check for this file first. If it exists we must discard all
+ * logs and databases. This avoids the recovery problems, and leads to a fresh
+ * attempt at internal init if the environment becomes a replication client and
+ * finds a master. The list of databases which may need to be removed is stored
+ * in this file.
+ */
+#define REP_INITNAME "__db.rep.init"
+#define REP_INITVERSION_46 1
+#define REP_INITVERSION_47 2
+#define REP_INITVERSION 2
+
+#define REP_META_RETRY 3 /* Retry limit to get meta lock. */
+
+/*
+ * Database types for __rep_client_dbinit
+ */
+typedef enum {
+ REP_DB, /* Log record database. */
+ REP_PG /* Pg database. */
+} repdb_t;
+
+/* Macros to lock/unlock the replication region as a whole. */
+#define REP_SYSTEM_LOCK(env) \
+ MUTEX_LOCK(env, (env)->rep_handle->region->mtx_region)
+#define REP_SYSTEM_UNLOCK(env) \
+ MUTEX_UNLOCK(env, (env)->rep_handle->region->mtx_region)
+
+/*
+ * Macros for manipulating the event synchronization. We use a separate mutex
+ * so that an application's call-back function can be invoked without locking
+ * the whole region.
+ */
+#define REP_EVENT_LOCK(env) \
+ MUTEX_LOCK(env, (env)->rep_handle->region->mtx_event)
+#define REP_EVENT_UNLOCK(env) \
+ MUTEX_UNLOCK(env, (env)->rep_handle->region->mtx_event)
+
+/*
+ * REP --
+ * Shared replication structure.
+ */
+typedef struct __rep {
+ db_mutex_t mtx_region; /* Region mutex. */
+ db_mutex_t mtx_clientdb; /* Client database mutex. */
+ db_mutex_t mtx_ckp; /* Checkpoint mutex. */
+ roff_t lease_off; /* Offset of the lease table. */
+ roff_t tally_off; /* Offset of the tally region. */
+ roff_t v2tally_off; /* Offset of the vote2 tally region. */
+ int eid; /* Environment id. */
+ int master_id; /* ID of the master site. */
+ u_int32_t version; /* Current replication version. */
+ u_int32_t egen; /* Replication election generation. */
+ u_int32_t gen; /* Replication generation number. */
+ u_int32_t asites; /* Space allocated for sites. */
+ u_int32_t nsites; /* Number of sites in group. */
+ u_int32_t nvotes; /* Number of votes needed. */
+ u_int32_t priority; /* My priority in an election. */
+ u_int32_t config_nsites;
+
+ db_timeout_t elect_timeout; /* Normal/full election timeouts. */
+ db_timeout_t full_elect_timeout;
+
+ db_timeout_t chkpt_delay; /* Master checkpoint delay. */
+
+#define REP_DEFAULT_THROTTLE (10 * MEGABYTE) /* Default value is < 1Gig. */
+ u_int32_t gbytes; /* Limit on data sent in single... */
+ u_int32_t bytes; /* __rep_process_message call. */
+#define DB_REP_REQUEST_GAP 40000 /* 40 msecs */
+#define DB_REP_MAX_GAP 1280000 /* 1.28 seconds */
+ db_timespec request_gap; /* Minimum time to wait before we
+ * request a missing log record. */
+ db_timespec max_gap; /* Maximum time to wait before
+ * requesting a missing log record. */
+ /* Status change information */
+ u_int32_t apply_th; /* Number of callers in rep_apply. */
+ u_int32_t msg_th; /* Number of callers in rep_proc_msg.*/
+ u_int32_t handle_cnt; /* Count of handles in library. */
+ u_int32_t op_cnt; /* Multi-step operation count.*/
+ DB_LSN ckp_lsn; /* LSN for syncing a checkpoint. */
+ DB_LSN max_prep_lsn; /* Max LSN of txn_prepare record. */
+
+ /*
+ * Event notification synchronization: the mtx_event and associate
+ * fields which it protects govern event notification to the
+ * application. They form a guarantee that no matter how crazy the
+ * thread scheduling gets, the application sees a sensible, orderly
+ * progression of events.
+ */
+ db_mutex_t mtx_event; /* Serializes event notification. */
+ /*
+ * Latest generation whose NEWMASTER event the application has been
+ * notified of. Also serves to force STARTUPDONE to occur after
+ * NEWMASTER.
+ */
+ u_int32_t newmaster_event_gen;
+ /*
+ * Latest local victory of an election that the application has been
+ * notified of, expressed as the election generation number. This
+ * ensures we notify the application exactly once when it wins an
+ * election.
+ */
+ u_int32_t notified_egen;
+
+ /* Internal init information. */
+ u_int32_t nfiles; /* Number of files we have info on. */
+ u_int32_t curfile; /* Cur file we're getting (0-based). */
+ __rep_fileinfo_args *curinfo; /* Current file info ptr. */
+ u_int8_t *nextinfo; /* Next file info buffer. */
+ u_int8_t *originfo; /* Original file info buffer. */
+ u_int32_t infolen; /* Remaining length file info buffer. */
+ u_int32_t originfolen; /* Original length file info buffer. */
+ u_int32_t infoversion; /* Original file info version. */
+ DB_LSN first_lsn; /* Earliest LSN we need. */
+ u_int32_t first_vers; /* Log version of first log file. */
+ DB_LSN last_lsn; /* Latest LSN we need. */
+ /* These are protected by mtx_clientdb. */
+ db_pgno_t ready_pg; /* Next pg expected. */
+ db_pgno_t waiting_pg; /* First pg after gap. */
+ db_pgno_t max_wait_pg; /* Maximum pg requested. */
+ u_int32_t npages; /* Num of pages rcvd for this file. */
+ DB_MPOOLFILE *file_mpf; /* Mpoolfile for current database. */
+ DB *file_dbp; /* This file's page info. */
+ DBC *queue_dbc; /* Dbc for a queue file. */
+
+ /* Vote tallying information. */
+ u_int32_t sites; /* Sites heard from. */
+ int winner; /* Current winner EID. */
+ u_int32_t w_priority; /* Winner priority. */
+ u_int32_t w_gen; /* Winner generation. */
+ DB_LSN w_lsn; /* Winner LSN. */
+ u_int32_t w_tiebreaker; /* Winner tiebreaking value. */
+ u_int32_t votes; /* Number of votes for this site. */
+
+ db_timespec etime; /* Election start timestamp. */
+
+ /* Leases. */
+ db_timeout_t lease_timeout; /* Lease timeout. */
+ db_timespec lease_duration; /* Lease timeout with clock skew. */
+ u_int32_t clock_skew; /* Clock skew. */
+ u_int32_t clock_base; /* Clock scale factor base. */
+ db_timespec grant_expire; /* Local grant expiration time. */
+
+#ifdef HAVE_REPLICATION_THREADS
+ /*
+ * Replication Framework (repmgr) shared config information.
+ */
+ db_mutex_t mtx_repmgr; /* Region mutex. */
+ SITEADDR my_addr; /* SITEADDR of local site. */
+
+ int peer; /* Site to use for C2C sync. */
+ roff_t netaddr_off; /* Offset of site addresses region. */
+ u_int site_cnt; /* Array slots in use. */
+ u_int site_max; /* Total array slots allocated. */
+ u_int siteaddr_seq; /* Number of updates to this info. */
+
+ pid_t listener;
+
+#endif /* HAVE_REPLICATION_THREADS */
+
+ /* Statistics. */
+ DB_REP_STAT stat;
+#if defined(HAVE_REPLICATION_THREADS) && defined(HAVE_STATISTICS)
+ DB_REPMGR_STAT mstat;
+#endif
+
+ /* Configuration. */
+#define REP_C_2SITE_STRICT 0x00001 /* Don't cheat on elections. */
+#define REP_C_BULK 0x00002 /* Bulk transfer. */
+#define REP_C_DELAYCLIENT 0x00004 /* Delay client sync-up. */
+#define REP_C_INMEM 0x00008 /* In-memory replication. */
+#define REP_C_LEASE 0x00010 /* Leases configured. */
+#define REP_C_NOAUTOINIT 0x00020 /* No auto initialization. */
+#define REP_C_NOWAIT 0x00040 /* Immediate error return. */
+ u_int32_t config; /* Configuration flags. */
+
+ /*
+ * Please change __rep_print_all (rep_stat.c) to track any changes made
+ * to these flags.
+ */
+#define REP_F_ABBREVIATED 0x00000001 /* Recover NIMDB pages only. */
+#define REP_F_APP_BASEAPI 0x00000002 /* Base API application. */
+#define REP_F_APP_REPMGR 0x00000004 /* repmgr application. */
+#define REP_F_CLIENT 0x00000008 /* Client replica. */
+#define REP_F_DELAY 0x00000010 /* Delaying client sync-up. */
+#define REP_F_EGENUPDATE 0x00000020 /* Egen updated by ALIVE msg. */
+#define REP_F_EPHASE0 0x00000040 /* In phase 0 of election. */
+#define REP_F_EPHASE1 0x00000080 /* In phase 1 of election. */
+#define REP_F_EPHASE2 0x00000100 /* In phase 2 of election. */
+#define REP_F_GROUP_ESTD 0x00000200 /* Rep group is established. */
+#define REP_F_INREPELECT 0x00000400 /* Thread in rep_elect. */
+#define REP_F_INREPSTART 0x00000800 /* Thread in rep_start. */
+#define REP_F_LEASE_EXPIRED 0x00001000 /* Leases guaranteed expired. */
+#define REP_F_MASTER 0x00002000 /* Master replica. */
+#define REP_F_MASTERELECT 0x00004000 /* Master elect. */
+#define REP_F_NEWFILE 0x00008000 /* Newfile in progress. */
+#define REP_F_NIMDBS_LOADED 0x00010000 /* NIMDBs are materialized. */
+#define REP_F_NOARCHIVE 0x00020000 /* Rep blocks log_archive. */
+#define REP_F_READY_API 0x00040000 /* Need handle_cnt to be 0. */
+#define REP_F_READY_APPLY 0x00080000 /* Need apply_th to be 0. */
+#define REP_F_READY_MSG 0x00100000 /* Need msg_th to be 0. */
+#define REP_F_READY_OP 0x00200000 /* Need op_cnt to be 0. */
+#define REP_F_RECOVER_LOG 0x00400000 /* In recovery - log. */
+#define REP_F_RECOVER_PAGE 0x00800000 /* In recovery - pages. */
+#define REP_F_RECOVER_UPDATE 0x01000000 /* In recovery - files. */
+#define REP_F_RECOVER_VERIFY 0x02000000 /* In recovery - verify. */
+#define REP_F_SKIPPED_APPLY 0x04000000 /* Skipped applying a record. */
+#define REP_F_START_CALLED 0x08000000 /* Rep_start called. */
+#define REP_F_TALLY 0x10000000 /* Tallied vote before elect. */
+ u_int32_t flags;
+} REP;
+
+/*
+ * Recovery flag mask to easily check any/all recovery bits. That is
+ * REP_F_READY_{API|OP} and all REP_F_RECOVER*. This must change if the values
+ * of the flags change. NOTE: We do not include REP_F_READY_MSG in
+ * this mask because it is used frequently in non-recovery related
+ * areas and we want to manipulate it separately (see especially
+ * in __rep_new_master).
+ */
+#define REP_F_RECOVER_MASK \
+ (REP_F_READY_API | REP_F_READY_OP | \
+ REP_F_RECOVER_LOG | REP_F_RECOVER_PAGE | \
+ REP_F_RECOVER_UPDATE | REP_F_RECOVER_VERIFY)
+
+/*
+ * These flag bits are "permanent": for each of these bits, once it has been set
+ * it shouldnever be cleared. When adding a new flag bit, if it should be
+ * sticky please add it here too.
+ */
+#define REP_F_STICKY_MASK \
+ (REP_F_APP_BASEAPI | REP_F_APP_REPMGR | REP_F_GROUP_ESTD | \
+ REP_F_NIMDBS_LOADED | REP_F_START_CALLED)
+
+/*
+ * REP_F_EPHASE0 is not a *real* election phase. It is used for
+ * master leases and allowing the client to find the master or
+ * expire its lease. However, EPHASE0 is cleared by __rep_elect_done.
+ */
+#define IN_ELECTION(R) \
+ F_ISSET((R), REP_F_EPHASE1 | REP_F_EPHASE2)
+#define IN_ELECTION_TALLY(R) \
+ F_ISSET((R), REP_F_EPHASE1 | REP_F_EPHASE2 | REP_F_TALLY)
+#define ELECTION_MAJORITY(n) (((n) / 2) + 1)
+
+#define IN_INTERNAL_INIT(R) \
+ F_ISSET((R), REP_F_RECOVER_LOG | REP_F_RECOVER_PAGE)
+
+#define IS_REP_MASTER(env) \
+ (REP_ON(env) && \
+ F_ISSET(((env)->rep_handle->region), REP_F_MASTER))
+
+#define IS_REP_CLIENT(env) \
+ (REP_ON(env) && \
+ F_ISSET(((env)->rep_handle->region), REP_F_CLIENT))
+
+#define IS_REP_STARTED(env) \
+ (REP_ON(env) && \
+ F_ISSET(((env)->rep_handle->region), REP_F_START_CALLED))
+
+#define IS_USING_LEASES(env) \
+ (REP_ON(env) && \
+ FLD_ISSET(((env)->rep_handle->region)->config, REP_C_LEASE))
+
+#define IS_CLIENT_PGRECOVER(env) \
+ (IS_REP_CLIENT(env) && \
+ F_ISSET(((env)->rep_handle->region), REP_F_RECOVER_PAGE))
+
+/*
+ * Macros to figure out if we need to do replication pre/post-amble processing.
+ * Skip for specific DB handles owned by the replication layer, either because
+ * replication is running recovery or because it's a handle entirely owned by
+ * the replication code (replication opens its own databases to track state).
+ */
+#define IS_ENV_REPLICATED(env) \
+ (REP_ON(env) && (env)->rep_handle->region->flags != 0)
+
+/*
+ * Gap processing flags. These provide control over the basic
+ * gap processing algorithm for some special cases.
+ */
+#define REP_GAP_FORCE 0x001 /* Force a request for a gap. */
+#define REP_GAP_REREQUEST 0x002 /* Gap request is a forced rerequest. */
+ /* REREQUEST is a superset of FORCE. */
+
+/*
+ * Basic pre/post-amble processing.
+ */
+#define REPLICATION_WRAP(env, func_call, checklock, ret) do { \
+ int __rep_check, __t_ret; \
+ __rep_check = IS_ENV_REPLICATED(env) ? 1 : 0; \
+ (ret) = __rep_check ? __env_rep_enter(env, checklock) : 0; \
+ if ((ret) == 0) { \
+ (ret) = func_call; \
+ if (__rep_check && (__t_ret = \
+ __env_db_rep_exit(env)) != 0 && (ret) == 0) \
+ (ret) = __t_ret; \
+ } \
+} while (0)
+
+/*
+ * Per-process replication structure.
+ *
+ * There are 2 mutexes used in the Base replication API. (See LOCK_MUTEX in
+ * repmgr.h for a discussion of repmgr.)
+ * 1. mtx_region - This protects the fields of the rep region above.
+ * 2. mtx_clientdb - This protects the per-process flags, and bookkeeping
+ * database and all of the components that maintain it. Those
+ * components include the following fields in the log region (see log.h):
+ * a. ready_lsn
+ * b. waiting_lsn
+ * c. verify_lsn
+ * d. wait_recs
+ * e. rcvd_recs
+ * f. max_wait_lsn
+ * These fields in the log region are NOT protected by the log region lock at
+ * all.
+ *
+ * Note that the per-process flags should truly be protected by a special
+ * per-process thread mutex, but it is currently set in so isolated a manner
+ * that it didn't make sense to do so and in most case we're already holding
+ * the mtx_clientdb anyway.
+ *
+ * The lock ordering protocol is that mtx_clientdb must be acquired first and
+ * then either REP->mtx_region, or the LOG->mtx_region mutex may be acquired if
+ * necessary.
+ *
+ * Note that the appropriate mutex is needed any time one or more related
+ * values are read or written that could possibly use more than one atomic
+ * machine instruction. A single 32-bit integer value is safe without a
+ * mutex, but most other types of value should use a mutex.
+ *
+ * Any use of a mutex must be inside a matched pair of ENV_ENTER() and
+ * ENV_LEAVE() macros. This ensures that if a thread dies while holding
+ * a lock (i.e. a mutex), recovery can clean it up so that it does not
+ * indefinitely block other threads.
+ */
+struct __db_rep {
+ /*
+ * Shared configuration information -- copied to and maintained in the
+ * shared region as soon as the shared region is created.
+ */
+ int eid; /* Environment ID. */
+
+ u_int32_t gbytes; /* Limit on data sent in single... */
+ u_int32_t bytes; /* __rep_process_message call. */
+
+ db_timespec request_gap; /* Minimum time to wait before we
+ * request a missing log record. */
+ db_timespec max_gap; /* Maximum time to wait before
+ * requesting a missing log record. */
+
+ u_int32_t clock_skew; /* Clock skew factor. */
+ u_int32_t clock_base; /* Clock skew base. */
+ u_int32_t config; /* Configuration flags. */
+ u_int32_t config_nsites;
+
+ db_timeout_t elect_timeout; /* Normal/full election timeouts. */
+ db_timeout_t full_elect_timeout;
+
+ db_timeout_t chkpt_delay; /* Master checkpoint delay. */
+
+ u_int32_t my_priority;
+ db_timeout_t lease_timeout; /* Master leases. */
+ /*
+ * End of shared configuration information.
+ */
+ int (*send) /* Send function. */
+ __P((DB_ENV *, const DBT *, const DBT *,
+ const DB_LSN *, int, u_int32_t));
+
+ DB *rep_db; /* Bookkeeping database. */
+
+ REP *region; /* In memory structure. */
+ u_int8_t *bulk; /* Shared memory bulk area. */
+
+ /*
+ * Please change __rep_print_all (rep_stat.c) to track any changes made
+ * to these flags.
+ */
+#define DBREP_APP_BASEAPI 0x0001 /* Base API application. */
+#define DBREP_APP_REPMGR 0x0002 /* repmgr application. */
+#define DBREP_OPENFILES 0x0004 /* This handle has opened files. */
+ u_int32_t flags; /* per-process flags. */
+
+#ifdef HAVE_REPLICATION_THREADS
+ /*
+ * Replication Framework (repmgr) per-process information.
+ */
+ int nthreads;
+ u_int32_t init_policy;
+ int perm_policy;
+ int peer; /* Site to use for C2C sync. */
+ db_timeout_t ack_timeout;
+ db_timeout_t election_retry_wait;
+ db_timeout_t connection_retry_wait;
+ db_timeout_t heartbeat_frequency; /* Max period between msgs. */
+ db_timeout_t heartbeat_monitor_timeout;
+
+ /* Repmgr's copies of rep stuff. */
+ int master_eid;
+
+ /* Thread synchronization. */
+ REPMGR_RUNNABLE *selector, **messengers, *elect_thread;
+ mgr_mutex_t *mutex;
+ cond_var_t queue_nonempty, check_election;
+#ifdef DB_WIN32
+ ACK_WAITERS_TABLE *waiters;
+ HANDLE signaler;
+#else
+ pthread_cond_t ack_condition;
+ int read_pipe, write_pipe;
+#endif
+
+ /* Operational stuff. */
+ REPMGR_SITE *sites; /* Array of known sites. */
+ u_int site_cnt; /* Array slots in use. */
+ u_int site_max; /* Total array slots allocated. */
+ u_int siteaddr_seq; /* Last known update to this list. */
+
+ /*
+ * The connections list contains only those connections not actively
+ * associated with a known site (see repmgr.h).
+ */
+ CONNECTION_LIST connections;
+ RETRY_Q_HEADER retries; /* Sites needing connection retry. */
+ struct {
+ int size;
+ STAILQ_HEAD(__repmgr_q_header, __repmgr_message) header;
+ } input_queue;
+
+ socket_t listen_fd;
+ repmgr_netaddr_t my_addr;
+ db_timespec last_bcast; /* Time of last broadcast msg. */
+
+ int finished; /* Repmgr threads should shut down. */
+ int done_one; /* TODO: rename */
+ int found_master;
+ int takeover_pending; /* We've been elected master. */
+
+/* Operations we can ask election thread to perform (OOB value is 0): */
+#define ELECT_ELECTION 1 /* Call for an election. */
+#define ELECT_FAILURE_ELECTION 2 /* Do election, adjusting nsites to account
+ for a failed master. */
+#define ELECT_REPSTART 3 /* Call rep_start(CLIENT). */
+ int operation_needed; /* Next op for election thread. */
+
+#endif /* HAVE_REPLICATION_THREADS */
+};
+
+/*
+ * Determine whether application is repmgr or base replication API. If
+ * repmgr was configured, base the test on internal replication flags for
+ * APP_REPMGR and APP_BASEAPI. These flags get set by the appropriate parts
+ * of the various replication APIs.
+ */
+#ifdef HAVE_REPLICATION_THREADS
+/*
+ * Application type is set to be repmgr when:
+ * 1. A local site is defined.
+ * 2. A remote site is defined.
+ * 3. An acknowledgement policy is configured.
+ * 4. 2SITE_STRICT is configured.
+ * 5. A timeout value is configured for one of the repmgr timeouts.
+ */
+#define APP_IS_REPMGR(env) \
+ (REP_ON(env) ? \
+ F_ISSET((env)->rep_handle->region, REP_F_APP_REPMGR) : \
+ F_ISSET((env)->rep_handle, DBREP_APP_REPMGR))
+
+/*
+ * Application type is set to be base replication API when:
+ * 1. Transport send function is defined and is not the repmgr send
+ * function.
+ */
+#define APP_IS_BASEAPI(env) \
+ (REP_ON(env) ? \
+ F_ISSET((env)->rep_handle->region, REP_F_APP_BASEAPI) : \
+ F_ISSET((env)->rep_handle, DBREP_APP_BASEAPI))
+
+/*
+ * Set application type. These macros do extra checking to guarantee that
+ * only one application type is ever set.
+ */
+#define APP_SET_REPMGR(env) do { \
+ if (REP_ON(env)) { \
+ if (!F_ISSET((env)->rep_handle->region, \
+ REP_F_APP_BASEAPI)) \
+ F_SET((env)->rep_handle->region, \
+ REP_F_APP_REPMGR); \
+ } else if (!F_ISSET((env)->rep_handle, DBREP_APP_BASEAPI)) \
+ F_SET((env)->rep_handle, DBREP_APP_REPMGR); \
+} while (0)
+#define APP_SET_BASEAPI(env) do { \
+ if (REP_ON(env)) { \
+ if (!F_ISSET((env)->rep_handle->region, \
+ REP_F_APP_REPMGR)) \
+ F_SET((env)->rep_handle->region, \
+ REP_F_APP_BASEAPI); \
+ } else if (!F_ISSET((env)->rep_handle, DBREP_APP_REPMGR)) \
+ F_SET((env)->rep_handle, DBREP_APP_BASEAPI); \
+} while (0)
+
+#else
+/*
+ * We did not configure repmgr, application must be base replication API.
+ * The APP_SET_* macros are noops in this case, but they must be defined
+ * with a null body to avoid compiler warnings on some platforms.
+ */
+#define APP_IS_REPMGR(env) 0
+#define APP_SET_REPMGR(env) do { \
+ ; \
+} while (0)
+#define APP_IS_BASEAPI(env) 1
+#define APP_SET_BASEAPI(env) do { \
+ ; \
+} while (0)
+#endif /* HAVE_REPLICATION_THREADS */
+
+/*
+ * Control structure flags for replication communication infrastructure.
+ */
+/*
+ * Define old DB_LOG_ values that we must support here. For reasons of
+ * compatibility with old versions, these values must be reserved explicitly in
+ * the list of flag values (below)
+ */
+#define DB_LOG_PERM_42_44 0x20
+#define DB_LOG_RESEND_42_44 0x40
+#define REPCTL_INIT_45 0x02 /* Back compatible flag value. */
+
+#define REPCTL_ELECTABLE 0x01 /* Upgraded client is electable. */
+#define REPCTL_FLUSH 0x02 /* Record should be flushed. */
+#define REPCTL_GROUP_ESTD 0x04 /* Message from site in a group. */
+#define REPCTL_INIT 0x08 /* Internal init message. */
+#define REPCTL_LEASE 0x10 /* Lease related message.. */
+ /*
+ * Skip over reserved values 0x20
+ * and 0x40, as explained above.
+ */
+#define REPCTL_LOG_END 0x80 /* Approximate end of group-wide log. */
+#define REPCTL_PERM DB_LOG_PERM_42_44
+#define REPCTL_RESEND DB_LOG_RESEND_42_44
+
+/*
+ * File info flags for internal init. The per-database (i.e., file) flag
+ * represents the on-disk format of the file, and is conveyed from the master to
+ * the initializing client in the UPDATE message, so that the client can know
+ * how to create the file. The per-page flag is conveyed along with each PAGE
+ * message, describing the format of the page image being transmitted; it is of
+ * course set by the site serving the PAGE_REQ. The serving site gets the page
+ * image from its own mpool, and thus the page is in the native format of the
+ * serving site. This format may be different (i.e., opposite) from the on-disk
+ * format, and in fact can vary per-page, since with client-to-client sync it is
+ * possible for various different sites to serve the various PAGE_REQ requests.
+ */
+#define REPINFO_DB_LITTLEENDIAN 0x0001 /* File is little-endian lorder. */
+#define REPINFO_PG_LITTLEENDIAN 0x0002 /* Page is little-endian lorder. */
+
+/*
+ * Control message format for 4.6 release. The db_timespec_t is
+ * not a portable structure. Therefore, in 4.6, replication among
+ * mixed OSs such as Linux and Windows, which have different time_t
+ * sizes, does not work.
+ */
+typedef struct {
+ u_int32_t rep_version; /* Replication version number. */
+ u_int32_t log_version; /* Log version number. */
+
+ DB_LSN lsn; /* Log sequence number. */
+ u_int32_t rectype; /* Message type. */
+ u_int32_t gen; /* Generation number. */
+ db_timespec msg_time; /* Timestamp seconds for leases. */
+ u_int32_t flags; /* log_put flag value. */
+} REP_46_CONTROL;
+
+/*
+ * Control message format for 4.5 release and earlier.
+ */
+typedef struct {
+ u_int32_t rep_version; /* Replication version number. */
+ u_int32_t log_version; /* Log version number. */
+
+ DB_LSN lsn; /* Log sequence number. */
+ u_int32_t rectype; /* Message type. */
+ u_int32_t gen; /* Generation number. */
+ u_int32_t flags; /* log_put flag value. */
+} REP_OLD_CONTROL;
+
+#define LEASE_REFRESH_TRIES 3 /* Number of times to try refresh. */
+
+/* Master granted lease information. */
+typedef struct __rep_lease_entry {
+ int eid; /* EID of client grantor. */
+ db_timespec start_time; /* Start time clients echo back. */
+ db_timespec end_time; /* Master lease expiration time. */
+ DB_LSN lease_lsn; /* Durable LSN lease applies to. */
+} REP_LEASE_ENTRY;
+
+/*
+ * Old vote info where some fields were not fixed size.
+ */
+typedef struct {
+ u_int32_t egen; /* Election generation. */
+ int nsites; /* Number of sites I've been in
+ * communication with. */
+ int nvotes; /* Number of votes needed to win. */
+ int priority; /* My site's priority. */
+ u_int32_t tiebreaker; /* Tie-breaking quasi-random value. */
+} REP_OLD_VOTE_INFO;
+
+typedef struct {
+ u_int32_t egen; /* Voter's election generation. */
+ int eid; /* Voter's ID. */
+} REP_VTALLY;
+
+/*
+ * The REP_THROTTLE_ONLY flag is used to do throttle processing only.
+ * If set, it will only allow sending the REP_*_MORE message, but not
+ * the normal, non-throttled message. It is used to support throttling
+ * with bulk transfer.
+ */
+/* Flags for __rep_send_throttle. */
+#define REP_THROTTLE_ONLY 0x0001 /* Send _MORE message only. */
+
+/* Throttled message processing information. */
+typedef struct {
+ DB_LSN lsn; /* LSN of this record. */
+ DBT *data_dbt; /* DBT of this record. */
+ u_int32_t gbytes; /* This call's max gbytes sent. */
+ u_int32_t bytes; /* This call's max bytes sent. */
+ u_int32_t type; /* Record type. */
+} REP_THROTTLE;
+
+/* Bulk processing information. */
+/*
+ * !!!
+ * We use a uintptr_t for the offset. We'd really like to use a ptrdiff_t
+ * since that really is what it is. But ptrdiff_t is not portable and
+ * doesn't exist everywhere.
+ */
+typedef struct {
+ u_int8_t *addr; /* Address of bulk buffer. */
+ uintptr_t *offp; /* Ptr to current offset into buffer. */
+ u_int32_t len; /* Bulk buffer length. */
+ u_int32_t type; /* Item type in buffer (log, page). */
+ DB_LSN lsn; /* First LSN in buffer. */
+ int eid; /* ID of potential recipients. */
+#define BULK_XMIT 0x001 /* Buffer in transit. */
+ u_int32_t *flagsp; /* Buffer flags. */
+} REP_BULK;
+
+/*
+ * This structure takes care of representing a transaction.
+ * It holds all the records, sorted by page number so that
+ * we can obtain locks and apply updates in a deadlock free
+ * order.
+ */
+typedef struct {
+ u_int nlsns;
+ u_int nalloc;
+ DB_LSN *array;
+} LSN_COLLECTION;
+
+/*
+ * This is used by the page-prep routines to do the lock_vec call to
+ * apply the updates for a single transaction or a collection of
+ * transactions.
+ */
+typedef struct {
+ int n;
+ DB_LOCKREQ *reqs;
+ DBT *objs;
+} linfo_t;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/rep_ext.h"
+#endif /* !_DB_REP_H_ */
diff --git a/db-4.8.30/dbinc/repmgr.h b/db-4.8.30/dbinc/repmgr.h
new file mode 100644
index 0000000..a993714
--- /dev/null
+++ b/db-4.8.30/dbinc/repmgr.h
@@ -0,0 +1,548 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2006-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_REPMGR_H_
+#define _DB_REPMGR_H_
+
+#include "dbinc_auto/repmgr_auto.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Replication Framework message types. These values are transmitted to
+ * identify messages sent between sites, even sites running differing versions
+ * of software. Therefore, once assigned, the values are permanently "frozen".
+ * New message types added in later versions always get new (higher) values.
+ *
+ * For example, in repmgr wire protocol version 1 the highest assigned message
+ * type value was 3, for REPMGR_REP_MESSAGE. Wire protocol version 2 added the
+ * HEARTBEAT message type (4).
+ *
+ * We still list them in alphabetical order, for ease of reference. But this
+ * generally does not correspond to numerical order.
+ */
+#define REPMGR_ACK 1 /* Acknowledgement. */
+#define REPMGR_HANDSHAKE 2 /* Connection establishment sequence. */
+#define REPMGR_HEARTBEAT 4 /* Monitor connection health. */
+#define REPMGR_REP_MESSAGE 3 /* Normal replication message. */
+
+/* Heartbeats were introduced in version 2. */
+#define REPMGR_MAX_V1_MSG_TYPE 3
+#define REPMGR_MAX_V2_MSG_TYPE 4
+#define REPMGR_MAX_V3_MSG_TYPE 4
+#define HEARTBEAT_MIN_VERSION 2
+
+/* The range of protocol versions we're willing to support. */
+#define DB_REPMGR_VERSION 3
+#define DB_REPMGR_MIN_VERSION 1
+
+#ifdef DB_WIN32
+typedef SOCKET socket_t;
+typedef HANDLE thread_id_t;
+typedef HANDLE mgr_mutex_t;
+typedef HANDLE cond_var_t;
+typedef WSABUF db_iovec_t;
+#else
+typedef int socket_t;
+typedef pthread_t thread_id_t;
+typedef pthread_mutex_t mgr_mutex_t;
+typedef pthread_cond_t cond_var_t;
+typedef struct iovec db_iovec_t;
+#endif
+
+/*
+ * The (arbitrary) maximum number of outgoing messages we're willing to hold, on
+ * a queue per connection, waiting for TCP buffer space to become available in
+ * the kernel. Rather than exceeding this limit, we simply discard additional
+ * messages (since this is always allowed by the replication protocol).
+ * As a special dispensation, if a message is destined for a specific remote
+ * site (i.e., it's not a broadcast), then we first try blocking the sending
+ * thread, waiting for space to become available (though we only wait a limited
+ * time). This is so as to be able to handle the immediate flood of (a
+ * potentially large number of) outgoing messages that replication generates, in
+ * a tight loop, when handling PAGE_REQ, LOG_REQ and ALL_REQ requests.
+ */
+#define OUT_QUEUE_LIMIT 10
+
+/*
+ * The system value is available from sysconf(_SC_HOST_NAME_MAX).
+ * Historically, the maximum host name was 256.
+ */
+#ifndef MAXHOSTNAMELEN
+#define MAXHOSTNAMELEN 256
+#endif
+
+/* A buffer big enough for the string "site host.domain.com:65535". */
+#define MAX_SITE_LOC_STRING (MAXHOSTNAMELEN+20)
+typedef char SITE_STRING_BUFFER[MAX_SITE_LOC_STRING+1];
+
+/* Default timeout values, in seconds. */
+#define DB_REPMGR_DEFAULT_ACK_TIMEOUT (1 * US_PER_SEC)
+#define DB_REPMGR_DEFAULT_CONNECTION_RETRY (30 * US_PER_SEC)
+#define DB_REPMGR_DEFAULT_ELECTION_RETRY (10 * US_PER_SEC)
+
+struct __repmgr_connection;
+ typedef struct __repmgr_connection REPMGR_CONNECTION;
+struct __repmgr_queue; typedef struct __repmgr_queue REPMGR_QUEUE;
+struct __queued_output; typedef struct __queued_output QUEUED_OUTPUT;
+struct __repmgr_retry; typedef struct __repmgr_retry REPMGR_RETRY;
+struct __repmgr_runnable; typedef struct __repmgr_runnable REPMGR_RUNNABLE;
+struct __repmgr_site; typedef struct __repmgr_site REPMGR_SITE;
+struct __ack_waiters_table;
+ typedef struct __ack_waiters_table ACK_WAITERS_TABLE;
+
+typedef TAILQ_HEAD(__repmgr_conn_list, __repmgr_connection) CONNECTION_LIST;
+typedef STAILQ_HEAD(__repmgr_out_q_head, __queued_output) OUT_Q_HEADER;
+typedef TAILQ_HEAD(__repmgr_retry_q, __repmgr_retry) RETRY_Q_HEADER;
+
+/* Information about threads managed by Replication Framework. */
+struct __repmgr_runnable {
+ ENV *env;
+ thread_id_t thread_id;
+ void *(*run) __P((void *));
+ int finished;
+};
+
+/*
+ * Information about pending connection establishment retry operations.
+ *
+ * We keep these in order by time. This works, under the assumption that the
+ * DB_REP_CONNECTION_RETRY never changes once we get going (though that
+ * assumption is of course wrong, so this needs to be fixed).
+ *
+ * Usually, we put things onto the tail end of the list. But when we add a new
+ * site while threads are running, we trigger its first connection attempt by
+ * scheduling a retry for "0" microseconds from now, putting its retry element
+ * at the head of the list instead.
+ *
+ * TODO: I think this can be fixed by defining "time" to be the time the element
+ * was added (with some convention like "0" meaning immediate), rather than the
+ * deadline time.
+ */
+struct __repmgr_retry {
+ TAILQ_ENTRY(__repmgr_retry) entries;
+ u_int eid;
+ db_timespec time;
+};
+
+/*
+ * We use scatter/gather I/O for both reading and writing. The largest number
+ * of buffers we ever try to use at once is 5, corresponding to the 5 segments
+ * of a message described in the "wire protocol" (repmgr_net.c).
+ */
+typedef struct {
+ db_iovec_t vectors[5];
+
+ /*
+ * Index of the first iovec to be used. Initially of course this is
+ * zero. But as we progress through partial I/O transfers, it ends up
+ * pointing to the first iovec to be used on the next operation.
+ */
+ int offset;
+
+ /*
+ * Total number of pieces defined for this message; equal to the number
+ * of times add_buffer and/or add_dbt were called to populate it. We do
+ * *NOT* revise this as we go along. So subsequent I/O operations must
+ * use count-offset to get the number of active vector pieces still
+ * remaining.
+ */
+ int count;
+
+ /*
+ * Total number of bytes accounted for in all the pieces of this
+ * message. We do *NOT* revise this as we go along (though it's not
+ * clear we shouldn't).
+ */
+ size_t total_bytes;
+} REPMGR_IOVECS;
+
+typedef struct {
+ size_t length; /* number of bytes in data */
+ int ref_count; /* # of sites' send queues pointing to us */
+ u_int8_t data[1]; /* variable size data area */
+} REPMGR_FLAT;
+
+struct __queued_output {
+ STAILQ_ENTRY(__queued_output) entries;
+ REPMGR_FLAT *msg;
+ size_t offset;
+};
+
+/*
+ * The following is for input. Once we know the sizes of the pieces of an
+ * incoming message, we can create this struct (and also the data areas for the
+ * pieces themselves, in the same memory allocation). This is also the struct
+ * in which the message lives while it's waiting to be processed by message
+ * threads.
+ */
+typedef struct __repmgr_message {
+ STAILQ_ENTRY(__repmgr_message) entries;
+ int originating_eid;
+ DBT control, rec;
+} REPMGR_MESSAGE;
+
+typedef enum {
+ SIZES_PHASE,
+ DATA_PHASE
+} phase_t;
+
+/*
+ * If another site initiates a connection to us, when we receive it the
+ * connection state is immediately "connected". But when we initiate the
+ * connection to another site, it first has to go through a "connecting" state,
+ * until the non-blocking connect() I/O operation completes successfully.
+ * With an outgoing connection, we always know the associated site (and so
+ * we have a valid eid). But with an incoming connection, we don't know the
+ * site until we get a handshake message, so until that time the eid is
+ * invalid.
+ */
+struct __repmgr_connection {
+ TAILQ_ENTRY(__repmgr_connection) entries;
+
+ int eid; /* index into sites array in machtab */
+ socket_t fd;
+#ifdef DB_WIN32
+ WSAEVENT event_object;
+#endif
+
+ u_int32_t version; /* Wire protocol version on this connection. */
+ /* (0 means not yet determined.) */
+
+#define CONN_INCOMING 0x01 /* We received this via accept(). */
+ u_int32_t flags;
+
+/*
+ * When we initiate an outgoing connection, it starts off in CONNECTING state
+ * (or possibly CONNECTED). When the (non-blocking) connection operation later
+ * completes, we move to CONNECTED state. When we get the response to our
+ * version negotiation, we move to READY.
+ * For incoming connections that we accept, we start in NEGOTIATE, then to
+ * PARAMETERS, and then to READY.
+ * CONGESTED is a hierarchical substate of READY: it's just like READY, with
+ * the additional wrinkle that we don't bother waiting for the outgoing queue to
+ * drain in certain circumstances.
+ */
+#define CONN_CONGESTED 1 /* Long-lived full outgoing queue. */
+#define CONN_CONNECTED 2 /* Awaiting reply to our version negotiation. */
+#define CONN_CONNECTING 3 /* Awaiting completion of non-block connect. */
+#define CONN_DEFUNCT 4 /* Basically dead, awaiting clean-up. */
+#define CONN_NEGOTIATE 5 /* Awaiting version proposal. */
+#define CONN_PARAMETERS 6 /* Awaiting parameters handshake. */
+#define CONN_READY 7 /* Everything's fine. */
+ int state;
+
+ /*
+ * Output: usually we just simply write messages right in line, in the
+ * send() function's thread. But if TCP doesn't have enough network
+ * buffer space for us when we first try it, we instead allocate some
+ * memory, and copy the message, and then send it as space becomes
+ * available in our main select() thread. In some cases, if the queue
+ * gets too long we wait until it's drained, and then append to it.
+ * This condition variable's associated mutex is the normal per-repmgr
+ * db_rep->mutex, because that mutex is always held anyway whenever the
+ * output queue is consulted.
+ */
+ OUT_Q_HEADER outbound_queue;
+ int out_queue_length;
+ cond_var_t drained;
+ int blockers; /* ref count of msg threads waiting on us */
+
+ /*
+ * Input: while we're reading a message, we keep track of what phase
+ * we're in. In both phases, we use a REPMGR_IOVECS to keep track of
+ * our progress within the phase. Depending upon the message type, we
+ * end up with either a rep_message (which is a wrapper for the control
+ * and rec DBTs), or a single generic DBT.
+ * Any time we're in DATA_PHASE, it means we have already received
+ * the message header (consisting of msg_type and 2 sizes), and
+ * therefore we have allocated buffer space to read the data. (This is
+ * important for resource clean-up.)
+ */
+ phase_t reading_phase;
+ REPMGR_IOVECS iovecs;
+
+ u_int8_t msg_type;
+ u_int32_t control_size_buf, rec_size_buf;
+
+ union {
+ REPMGR_MESSAGE *rep_message;
+ struct {
+ DBT cntrl, rec;
+ } repmgr_msg;
+ } input;
+};
+
+#define IS_READY_STATE(s) ((s) == CONN_READY || (s) == CONN_CONGESTED)
+
+#ifdef HAVE_GETADDRINFO
+typedef struct addrinfo ADDRINFO;
+#else
+/*
+ * Some windows platforms have getaddrinfo (Windows XP), some don't. We don't
+ * support conditional compilation in our Windows build, so we always use our
+ * own getaddrinfo implementation. Rename everything so that we don't collide
+ * with the system libraries.
+ */
+#undef AI_PASSIVE
+#define AI_PASSIVE 0x01
+#undef AI_CANONNAME
+#define AI_CANONNAME 0x02
+#undef AI_NUMERICHOST
+#define AI_NUMERICHOST 0x04
+
+typedef struct __addrinfo {
+ int ai_flags; /* AI_PASSIVE, AI_CANONNAME, AI_NUMERICHOST */
+ int ai_family; /* PF_xxx */
+ int ai_socktype; /* SOCK_xxx */
+ int ai_protocol; /* 0 or IPPROTO_xxx for IPv4 and IPv6 */
+ size_t ai_addrlen; /* length of ai_addr */
+ char *ai_canonname; /* canonical name for nodename */
+ struct sockaddr *ai_addr; /* binary address */
+ struct __addrinfo *ai_next; /* next structure in linked list */
+} ADDRINFO;
+#endif /* HAVE_GETADDRINFO */
+
+/*
+ * Unprocessed network address configuration, as stored in shared region.
+ */
+typedef struct {
+ roff_t host; /* Separately allocated copy of string. */
+ u_int16_t port; /* Stored in plain old host-byte-order. */
+} SITEADDR;
+
+/*
+ * Local copy of local and remote addresses, with resolved addrinfo.
+ */
+typedef struct {
+ char *host; /* Separately allocated copy of string. */
+ u_int16_t port; /* Stored in plain old host-byte-order. */
+ ADDRINFO *address_list;
+ ADDRINFO *current;
+} repmgr_netaddr_t;
+
+/*
+ * Each site that we know about is either idle or connected. If it's connected,
+ * we have a reference to a connection object; if it's idle, we have a reference
+ * to a retry object. (But see note about sub_conns, below.)
+ * We store site objects in a simple array in the machtab, indexed by EID.
+ * (We allocate EID numbers for other sites simply according to their index
+ * within this array; we use the special value INT_MAX to represent our own
+ * EID.)
+ */
+struct __repmgr_site {
+ repmgr_netaddr_t net_addr;
+ DB_LSN max_ack; /* Best ack we've heard from this site. */
+ u_int32_t priority;
+ db_timespec last_rcvd_timestamp;
+
+ union {
+ REPMGR_CONNECTION *conn; /* when CONNECTED */
+ REPMGR_RETRY *retry; /* when IDLE */
+ } ref;
+
+ /*
+ * Subordinate connections (connections from subordinate processes at a
+ * multi-process site). Note that the SITE_CONNECTED state, and all the
+ * ref.retry stuff above is irrelevant to subordinate connections. If a
+ * connection is on this list, it exists; and we never bother trying to
+ * reconnect lost connections (indeed we can't, for these are always
+ * incoming-only).
+ */
+ CONNECTION_LIST sub_conns;
+
+#define SITE_IDLE 1 /* Waiting til time to retry connecting. */
+#define SITE_CONNECTED 2
+ int state;
+
+#define SITE_HAS_PRIO 0x01 /* Set if priority field has valid value. */
+ u_int32_t flags;
+};
+
+/*
+ * Repmgr keeps track of references to connection information (instances
+ * of struct __repmgr_connection). There are three kinds of places
+ * connections may be found: (1) SITE->ref.conn, (2) SITE->sub_conns, and
+ * (3) db_rep->connections.
+ *
+ * 1. SITE->ref.conn points to our connection with the main process running
+ * at the given site, if such a connection exists. We may have initiated
+ * the connection to the site ourselves, or we may have received it as an
+ * incoming connection. Once it is established there is very little
+ * difference between those two cases.
+ *
+ * 2. SITE->sub_conns is a list of connections we have with subordinate
+ * processes running at the given site. There can be any number of these
+ * connections, one per subordinate process. Note that these connections
+ * are always incoming: there's no way for us to initiate this kind of
+ * connection because subordinate processes do not "listen".
+ *
+ * 3. The db_rep->connections list contains the references to any
+ * connections that are not actively associated with any site (we
+ * sometimes call these "orphans"). There are two times when this can
+ * be:
+ *
+ * a) When we accept an incoming connection, we don't know what site it
+ * comes from until we read the initial handshake message.
+ *
+ * b) When an error occurs on a connection, we first mark it as DEFUNCT
+ * and stop using it. Then, at a later, well-defined time, we close
+ * the connection's file descriptor and get rid of the connection
+ * struct.
+ *
+ * In light of the above, we can see that the following describes the
+ * rules for how connections may be moved among these three kinds of
+ * "places":
+ *
+ * - when we initiate an outgoing connection, we of course know what site
+ * it's going to be going to, and so we immediately put the pointer to
+ * the connection struct into SITE->ref.conn
+ *
+ * - when we accept an incoming connection, we don't immediately know
+ * whom it's from, so we have to put it on the orphans list
+ * (db_rep->connections).
+ *
+ * - (incoming, cont.) But as soon as we complete the initial "handshake"
+ * message exchange, we will know which site it's from and whether it's
+ * a subordinate or main connection. At that point we remove it from
+ * db_rep->connections and either point to it by SITE->ref.conn, or add
+ * it to the SITE->sub_conns list.
+ *
+ * - (for any active connection) when an error occurs, we move the
+ * connection to the orphans list until we have a chance to close it.
+ */
+
+/*
+ * Repmgr message formats.
+ *
+ * Declarative definitions of current message formats appear in repmgr.src.
+ * (The s_message/gen_msg.awk utility generates C code.) In general, we send
+ * the buffers marshaled from those structure formats in the "control" portion
+ * of a message.
+ */
+
+/*
+ * Flags for the handshake message (new in 4.8).
+ */
+#define REPMGR_SUBORDINATE 0x01 /* This is a subordinate connection. */
+
+/*
+ * Legacy V1 handshake message format. For compatibility, we send this as part
+ * of version negotiation upon connection establishment.
+ */
+typedef struct {
+ u_int32_t version;
+ u_int16_t port;
+ u_int32_t priority;
+} DB_REPMGR_V1_HANDSHAKE;
+
+/*
+ * We store site structs in a dynamically allocated, growable array, indexed by
+ * EID. We allocate EID numbers for remote sites simply according to their
+ * index within this array. We don't need (the same kind of) info for ourself
+ * (the local site), so we use an EID value that won't conflict with any valid
+ * array index.
+ */
+#define SITE_FROM_EID(eid) (&db_rep->sites[eid])
+#define EID_FROM_SITE(s) ((int)((s) - (&db_rep->sites[0])))
+#define IS_VALID_EID(e) ((e) >= 0)
+#define IS_KNOWN_REMOTE_SITE(e) ((e) >= 0 && ((u_int)(e)) < db_rep->site_cnt)
+#define SELF_EID INT_MAX
+
+#define IS_SUBORDINATE(db_rep) (db_rep->listen_fd == INVALID_SOCKET)
+
+#define IS_PEER_POLICY(p) ((p) == DB_REPMGR_ACKS_ALL_PEERS || \
+ (p) == DB_REPMGR_ACKS_QUORUM || \
+ (p) == DB_REPMGR_ACKS_ONE_PEER)
+
+/*
+ * Most of the code in repmgr runs while holding repmgr's main mutex, which
+ * resides in db_rep->mutex. This mutex is owned by a single repmgr process,
+ * and serializes access to the (large) critical sections among threads in the
+ * process. Unlike many other mutexes in DB, it is specifically coded as either
+ * a POSIX threads mutex or a Win32 mutex. Note that although it's a large
+ * fraction of the code, it's a tiny fraction of the time: repmgr spends most of
+ * its time in a call to select(), and as well a bit in calls into the Base
+ * replication API. All of those release the mutex.
+ * Access to repmgr's shared list of site addresses is protected by
+ * another mutex: mtx_repmgr. And, when changing space allocation for that site
+ * list we conform to the convention of acquiring renv->mtx_regenv. These are
+ * less frequent of course.
+ * When it's necessary to acquire more than one of these mutexes, the
+ * ordering priority is:
+ * db_rep->mutex (first)
+ * mtx_repmgr (briefly)
+ * mtx_regenv (last, and most briefly)
+ */
+#define LOCK_MUTEX(m) do { \
+ int __ret; \
+ if ((__ret = __repmgr_lock_mutex(m)) != 0) \
+ return (__ret); \
+} while (0)
+
+#define UNLOCK_MUTEX(m) do { \
+ int __ret; \
+ if ((__ret = __repmgr_unlock_mutex(m)) != 0) \
+ return (__ret); \
+} while (0)
+
+/* POSIX/Win32 socket (and other) portability. */
+#ifdef DB_WIN32
+#define WOULDBLOCK WSAEWOULDBLOCK
+#define INPROGRESS WSAEWOULDBLOCK
+
+#define net_errno WSAGetLastError()
+typedef int socklen_t;
+typedef char * sockopt_t;
+
+#define iov_len len
+#define iov_base buf
+
+typedef DWORD threadsync_timeout_t;
+
+#define REPMGR_INITED(db_rep) (db_rep->waiters != NULL)
+#else
+
+#define INVALID_SOCKET -1
+#define SOCKET_ERROR -1
+#define WOULDBLOCK EWOULDBLOCK
+#define INPROGRESS EINPROGRESS
+
+#define net_errno errno
+typedef void * sockopt_t;
+
+#define closesocket(fd) close(fd)
+
+typedef struct timespec threadsync_timeout_t;
+
+#define REPMGR_INITED(db_rep) (db_rep->read_pipe >= 0)
+#endif
+
+/* Macros to proceed, as with a cursor, through the address_list: */
+#define ADDR_LIST_CURRENT(na) ((na)->current)
+#define ADDR_LIST_FIRST(na) ((na)->current = (na)->address_list)
+#define ADDR_LIST_NEXT(na) ((na)->current = (na)->current->ai_next)
+#define ADDR_LIST_INIT(na, al) do { \
+ (na)->address_list = (al); \
+ ADDR_LIST_FIRST(na); \
+} while (0)
+
+/*
+ * Generic definition of some action to be performed on each connection, in the
+ * form of a call-back function.
+ */
+typedef int (*CONNECTION_ACTION) __P((ENV *, REPMGR_CONNECTION *, void *));
+
+#include "dbinc_auto/repmgr_ext.h"
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_REPMGR_H_ */
diff --git a/db-4.8.30/dbinc/shqueue.h b/db-4.8.30/dbinc/shqueue.h
new file mode 100644
index 0000000..9b12549
--- /dev/null
+++ b/db-4.8.30/dbinc/shqueue.h
@@ -0,0 +1,406 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_SHQUEUE_H_
+#define _DB_SHQUEUE_H_
+
+/*
+ * This file defines three types of data structures: chains, lists and
+ * tail queues similarly to the include file <sys/queue.h>.
+ *
+ * The difference is that this set of macros can be used for structures that
+ * reside in shared memory that may be mapped at different addresses in each
+ * process. In most cases, the macros for shared structures exactly mirror
+ * the normal macros, although the macro calls require an additional type
+ * parameter, only used by the HEAD and ENTRY macros of the standard macros.
+ *
+ * Since we use relative offsets of type ssize_t rather than pointers, 0
+ * (aka NULL) is a valid offset and cannot be used to indicate the end
+ * of a list. Therefore, we use -1 to indicate end of list.
+ *
+ * The macros ending in "P" return pointers without checking for end or
+ * beginning of lists, the others check for end of list and evaluate to
+ * either a pointer or NULL.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define SH_PTR_TO_OFF(src, dest) \
+ ((ssize_t)(((u_int8_t *)(dest)) - ((u_int8_t *)(src))))
+
+/*
+ * Shared memory chain definitions.
+ */
+#define SH_CHAIN_ENTRY \
+struct { \
+ ssize_t sce_next; /* relative offset to next element */ \
+ ssize_t sce_prev; /* relative offset of prev element */ \
+}
+
+#define SH_CHAIN_INIT(elm, field) \
+ (elm)->field.sce_next = (elm)->field.sce_prev = -1
+
+#define SH_CHAIN_HASNEXT(elm, field) ((elm)->field.sce_next != -1)
+#define SH_CHAIN_NEXTP(elm, field, type) \
+ ((struct type *)((u_int8_t *)(elm) + (elm)->field.sce_next))
+#define SH_CHAIN_NEXT(elm, field, type) (SH_CHAIN_HASNEXT(elm, field) ? \
+ SH_CHAIN_NEXTP(elm, field, type) : (struct type *)NULL)
+
+#define SH_CHAIN_HASPREV(elm, field) ((elm)->field.sce_prev != -1)
+#define SH_CHAIN_PREVP(elm, field, type) \
+ ((struct type *)((u_int8_t *)(elm) + (elm)->field.sce_prev))
+#define SH_CHAIN_PREV(elm, field, type) (SH_CHAIN_HASPREV(elm, field) ? \
+ SH_CHAIN_PREVP(elm, field, type) : (struct type *)NULL)
+
+#define SH_CHAIN_SINGLETON(elm, field) \
+ (!(SH_CHAIN_HASNEXT(elm, field) || SH_CHAIN_HASPREV(elm, field)))
+
+#define SH_CHAIN_INSERT_AFTER(listelm, elm, field, type) do { \
+ struct type *__next = SH_CHAIN_NEXT(listelm, field, type); \
+ if (__next != NULL) { \
+ (elm)->field.sce_next = SH_PTR_TO_OFF(elm, __next); \
+ __next->field.sce_prev = SH_PTR_TO_OFF(__next, elm); \
+ } else \
+ (elm)->field.sce_next = -1; \
+ (elm)->field.sce_prev = SH_PTR_TO_OFF(elm, listelm); \
+ (listelm)->field.sce_next = SH_PTR_TO_OFF(listelm, elm); \
+} while (0)
+
+#define SH_CHAIN_INSERT_BEFORE(listelm, elm, field, type) do { \
+ struct type *__prev = SH_CHAIN_PREV(listelm, field, type); \
+ if (__prev != NULL) { \
+ (elm)->field.sce_prev = SH_PTR_TO_OFF(elm, __prev); \
+ __prev->field.sce_next = SH_PTR_TO_OFF(__prev, elm); \
+ } else \
+ (elm)->field.sce_prev = -1; \
+ (elm)->field.sce_next = SH_PTR_TO_OFF(elm, listelm); \
+ (listelm)->field.sce_prev = SH_PTR_TO_OFF(listelm, elm); \
+} while (0)
+
+#define SH_CHAIN_REMOVE(elm, field, type) do { \
+ struct type *__prev = SH_CHAIN_PREV(elm, field, type); \
+ struct type *__next = SH_CHAIN_NEXT(elm, field, type); \
+ if (__next != NULL) \
+ __next->field.sce_prev = (__prev == NULL) ? -1 : \
+ SH_PTR_TO_OFF(__next, __prev); \
+ if (__prev != NULL) \
+ __prev->field.sce_next = (__next == NULL) ? -1 : \
+ SH_PTR_TO_OFF(__prev, __next); \
+ SH_CHAIN_INIT(elm, field); \
+} while (0)
+
+/*
+ * Shared memory list definitions.
+ */
+#define SH_LIST_HEAD(name) \
+struct name { \
+ ssize_t slh_first; /* first element */ \
+}
+
+#define SH_LIST_HEAD_INITIALIZER(head) \
+ { -1 }
+
+#define SH_LIST_ENTRY \
+struct { \
+ ssize_t sle_next; /* relative offset to next element */ \
+ ssize_t sle_prev; /* relative offset of prev element */ \
+}
+
+/*
+ * Shared memory list functions.
+ */
+#define SH_LIST_EMPTY(head) \
+ ((head)->slh_first == -1)
+
+#define SH_LIST_FIRSTP(head, type) \
+ ((struct type *)(((u_int8_t *)(head)) + (head)->slh_first))
+
+#define SH_LIST_FIRST(head, type) \
+ (SH_LIST_EMPTY(head) ? NULL : \
+ ((struct type *)(((u_int8_t *)(head)) + (head)->slh_first)))
+
+#define SH_LIST_NEXTP(elm, field, type) \
+ ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next))
+
+#define SH_LIST_NEXT(elm, field, type) \
+ ((elm)->field.sle_next == -1 ? NULL : \
+ ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next)))
+
+ /*
+ *__SH_LIST_PREV_OFF is private API. It calculates the address of
+ * the elm->field.sle_next member of a SH_LIST structure. All offsets
+ * between elements are relative to that point in SH_LIST structures.
+ */
+#define __SH_LIST_PREV_OFF(elm, field) \
+ ((ssize_t *)(((u_int8_t *)(elm)) + (elm)->field.sle_prev))
+
+#define SH_LIST_PREV(elm, field, type) \
+ (struct type *)((ssize_t)(elm) - (*__SH_LIST_PREV_OFF(elm, field)))
+
+#define SH_LIST_FOREACH(var, head, field, type) \
+ for ((var) = SH_LIST_FIRST((head), type); \
+ (var) != NULL; \
+ (var) = SH_LIST_NEXT((var), field, type))
+
+/*
+ * Given correct A.next: B.prev = SH_LIST_NEXT_TO_PREV(A)
+ * in a list [A, B]
+ * The prev value is always the offset from an element to its preceding
+ * element's next location, not the beginning of the structure. To get
+ * to the beginning of an element structure in memory given an element
+ * do the following:
+ * A = B - (B.prev + (&B.next - B))
+ * Take the element's next pointer and calculate what the corresponding
+ * Prev pointer should be -- basically it is the negation plus the offset
+ * of the next field in the structure.
+ */
+#define SH_LIST_NEXT_TO_PREV(elm, field) \
+ (((elm)->field.sle_next == -1 ? 0 : -(elm)->field.sle_next) + \
+ SH_PTR_TO_OFF(elm, &(elm)->field.sle_next))
+
+#define SH_LIST_INIT(head) (head)->slh_first = -1
+
+#define SH_LIST_INSERT_BEFORE(head, listelm, elm, field, type) do { \
+ if (listelm == SH_LIST_FIRST(head, type)) { \
+ SH_LIST_INSERT_HEAD(head, elm, field, type); \
+ } else { \
+ (elm)->field.sle_next = SH_PTR_TO_OFF(elm, listelm); \
+ (elm)->field.sle_prev = SH_LIST_NEXT_TO_PREV( \
+ SH_LIST_PREV((listelm), field, type), field) + \
+ (elm)->field.sle_next; \
+ (SH_LIST_PREV(listelm, field, type))->field.sle_next = \
+ (SH_PTR_TO_OFF((SH_LIST_PREV(listelm, field, \
+ type)), elm)); \
+ (listelm)->field.sle_prev = SH_LIST_NEXT_TO_PREV(elm, field); \
+ } \
+} while (0)
+
+#define SH_LIST_INSERT_AFTER(listelm, elm, field, type) do { \
+ if ((listelm)->field.sle_next != -1) { \
+ (elm)->field.sle_next = SH_PTR_TO_OFF(elm, \
+ SH_LIST_NEXTP(listelm, field, type)); \
+ SH_LIST_NEXTP(listelm, field, type)->field.sle_prev = \
+ SH_LIST_NEXT_TO_PREV(elm, field); \
+ } else \
+ (elm)->field.sle_next = -1; \
+ (listelm)->field.sle_next = SH_PTR_TO_OFF(listelm, elm); \
+ (elm)->field.sle_prev = SH_LIST_NEXT_TO_PREV(listelm, field); \
+} while (0)
+
+#define SH_LIST_INSERT_HEAD(head, elm, field, type) do { \
+ if ((head)->slh_first != -1) { \
+ (elm)->field.sle_next = \
+ (head)->slh_first - SH_PTR_TO_OFF(head, elm); \
+ SH_LIST_FIRSTP(head, type)->field.sle_prev = \
+ SH_LIST_NEXT_TO_PREV(elm, field); \
+ } else \
+ (elm)->field.sle_next = -1; \
+ (head)->slh_first = SH_PTR_TO_OFF(head, elm); \
+ (elm)->field.sle_prev = SH_PTR_TO_OFF(elm, &(head)->slh_first); \
+} while (0)
+
+#define SH_LIST_REMOVE(elm, field, type) do { \
+ if ((elm)->field.sle_next != -1) { \
+ SH_LIST_NEXTP(elm, field, type)->field.sle_prev = \
+ (elm)->field.sle_prev - (elm)->field.sle_next; \
+ *__SH_LIST_PREV_OFF(elm, field) += (elm)->field.sle_next;\
+ } else \
+ *__SH_LIST_PREV_OFF(elm, field) = -1; \
+} while (0)
+
+#define SH_LIST_REMOVE_HEAD(head, field, type) do { \
+ if (!SH_LIST_EMPTY(head)) { \
+ SH_LIST_REMOVE(SH_LIST_FIRSTP(head, type), field, type);\
+ } \
+} while (0)
+
+/*
+ * Shared memory tail queue definitions.
+ */
+#define SH_TAILQ_HEAD(name) \
+struct name { \
+ ssize_t stqh_first; /* relative offset of first element */ \
+ ssize_t stqh_last; /* relative offset of last's next */ \
+}
+
+#define SH_TAILQ_HEAD_INITIALIZER(head) \
+ { -1, 0 }
+
+#define SH_TAILQ_ENTRY \
+struct { \
+ ssize_t stqe_next; /* relative offset of next element */ \
+ ssize_t stqe_prev; /* relative offset of prev's next */ \
+}
+
+/*
+ * Shared memory tail queue functions.
+ */
+
+#define SH_TAILQ_EMPTY(head) \
+ ((head)->stqh_first == -1)
+
+#define SH_TAILQ_FIRSTP(head, type) \
+ ((struct type *)((u_int8_t *)(head) + (head)->stqh_first))
+
+#define SH_TAILQ_FIRST(head, type) \
+ (SH_TAILQ_EMPTY(head) ? NULL : SH_TAILQ_FIRSTP(head, type))
+
+#define SH_TAILQ_NEXTP(elm, field, type) \
+ ((struct type *)((u_int8_t *)(elm) + (elm)->field.stqe_next))
+
+#define SH_TAILQ_NEXT(elm, field, type) \
+ ((elm)->field.stqe_next == -1 ? NULL : \
+ ((struct type *)((u_int8_t *)(elm) + (elm)->field.stqe_next)))
+
+ /*
+ * __SH_TAILQ_PREV_OFF is private API. It calculates the address of
+ * the elm->field.stqe_next member of a SH_TAILQ structure. All
+ * offsets between elements are relative to that point in SH_TAILQ
+ * structures.
+ */
+#define __SH_TAILQ_PREV_OFF(elm, field) \
+ ((ssize_t *)(((u_int8_t *)(elm)) + (elm)->field.stqe_prev))
+
+#define SH_TAILQ_PREVP(elm, field, type) \
+ (struct type *)((ssize_t)elm - (*__SH_TAILQ_PREV_OFF(elm, field)))
+
+#define SH_TAILQ_PREV(head, elm, field, type) \
+ (((elm) == SH_TAILQ_FIRST(head, type)) ? NULL : \
+ (struct type *)((ssize_t)elm - (*__SH_TAILQ_PREV_OFF(elm, field))))
+
+ /*
+ * __SH_TAILQ_LAST_OFF is private API. It calculates the address of
+ * the stqe_next member of a SH_TAILQ structure in the last element
+ * of this list. All offsets between elements are relative to that
+ * point in SH_TAILQ structures.
+ */
+#define __SH_TAILQ_LAST_OFF(head) \
+ ((ssize_t *)(((u_int8_t *)(head)) + (head)->stqh_last))
+
+#define SH_TAILQ_LASTP(head, field, type) \
+ ((struct type *)((ssize_t)(head) + \
+ ((ssize_t)((head)->stqh_last) - \
+ ((ssize_t)SH_PTR_TO_OFF(SH_TAILQ_FIRST(head, type), \
+ &(SH_TAILQ_FIRSTP(head, type)->field.stqe_next))))))
+
+#define SH_TAILQ_LAST(head, field, type) \
+ (SH_TAILQ_EMPTY(head) ? NULL : SH_TAILQ_LASTP(head, field, type))
+
+/*
+ * Given correct A.next: B.prev = SH_TAILQ_NEXT_TO_PREV(A)
+ * in a list [A, B]
+ * The prev value is always the offset from an element to its preceding
+ * element's next location, not the beginning of the structure. To get
+ * to the beginning of an element structure in memory given an element
+ * do the following:
+ * A = B - (B.prev + (&B.next - B))
+ */
+#define SH_TAILQ_NEXT_TO_PREV(elm, field) \
+ (((elm)->field.stqe_next == -1 ? 0 : \
+ (-(elm)->field.stqe_next) + \
+ SH_PTR_TO_OFF(elm, &(elm)->field.stqe_next)))
+
+#define SH_TAILQ_FOREACH(var, head, field, type) \
+ for ((var) = SH_TAILQ_FIRST((head), type); \
+ (var) != NULL; \
+ (var) = SH_TAILQ_NEXT((var), field, type))
+
+#define SH_TAILQ_FOREACH_REVERSE(var, head, field, type) \
+ for ((var) = SH_TAILQ_LAST((head), field, type); \
+ (var) != NULL; \
+ (var) = SH_TAILQ_PREV((head), (var), field, type))
+
+#define SH_TAILQ_INIT(head) { \
+ (head)->stqh_first = -1; \
+ (head)->stqh_last = SH_PTR_TO_OFF(head, &(head)->stqh_first); \
+}
+
+#define SH_TAILQ_INSERT_HEAD(head, elm, field, type) do { \
+ if ((head)->stqh_first != -1) { \
+ (elm)->field.stqe_next = \
+ (head)->stqh_first - SH_PTR_TO_OFF(head, elm); \
+ SH_TAILQ_FIRSTP(head, type)->field.stqe_prev = \
+ SH_TAILQ_NEXT_TO_PREV(elm, field); \
+ } else { \
+ (head)->stqh_last = \
+ SH_PTR_TO_OFF(head, &(elm)->field.stqe_next); \
+ (elm)->field.stqe_next = -1; \
+ } \
+ (head)->stqh_first = SH_PTR_TO_OFF(head, elm); \
+ (elm)->field.stqe_prev = \
+ SH_PTR_TO_OFF(elm, &(head)->stqh_first); \
+} while (0)
+
+#define SH_TAILQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.stqe_next = -1; \
+ (elm)->field.stqe_prev = \
+ -SH_PTR_TO_OFF(head, elm) + (head)->stqh_last; \
+ if ((head)->stqh_last == \
+ SH_PTR_TO_OFF((head), &(head)->stqh_first)) \
+ (head)->stqh_first = SH_PTR_TO_OFF(head, elm); \
+ else \
+ *__SH_TAILQ_LAST_OFF(head) = -(head)->stqh_last + \
+ SH_PTR_TO_OFF((elm), &(elm)->field.stqe_next) + \
+ SH_PTR_TO_OFF(head, elm); \
+ (head)->stqh_last = \
+ SH_PTR_TO_OFF(head, &((elm)->field.stqe_next)); \
+} while (0)
+
+#define SH_TAILQ_INSERT_BEFORE(head, listelm, elm, field, type) do { \
+ if (listelm == SH_TAILQ_FIRST(head, type)) { \
+ SH_TAILQ_INSERT_HEAD(head, elm, field, type); \
+ } else { \
+ (elm)->field.stqe_next = SH_PTR_TO_OFF(elm, listelm); \
+ (elm)->field.stqe_prev = SH_TAILQ_NEXT_TO_PREV( \
+ SH_TAILQ_PREVP((listelm), field, type), field) + \
+ (elm)->field.stqe_next; \
+ (SH_TAILQ_PREVP(listelm, field, type))->field.stqe_next =\
+ (SH_PTR_TO_OFF((SH_TAILQ_PREVP(listelm, field, type)), \
+ elm)); \
+ (listelm)->field.stqe_prev = \
+ SH_TAILQ_NEXT_TO_PREV(elm, field); \
+ } \
+} while (0)
+
+#define SH_TAILQ_INSERT_AFTER(head, listelm, elm, field, type) do { \
+ if ((listelm)->field.stqe_next != -1) { \
+ (elm)->field.stqe_next = (listelm)->field.stqe_next - \
+ SH_PTR_TO_OFF(listelm, elm); \
+ SH_TAILQ_NEXTP(listelm, field, type)->field.stqe_prev = \
+ SH_TAILQ_NEXT_TO_PREV(elm, field); \
+ } else { \
+ (elm)->field.stqe_next = -1; \
+ (head)->stqh_last = \
+ SH_PTR_TO_OFF(head, &(elm)->field.stqe_next); \
+ } \
+ (listelm)->field.stqe_next = SH_PTR_TO_OFF(listelm, elm); \
+ (elm)->field.stqe_prev = SH_TAILQ_NEXT_TO_PREV(listelm, field); \
+} while (0)
+
+#define SH_TAILQ_REMOVE(head, elm, field, type) do { \
+ if ((elm)->field.stqe_next != -1) { \
+ SH_TAILQ_NEXTP(elm, field, type)->field.stqe_prev = \
+ (elm)->field.stqe_prev + \
+ SH_PTR_TO_OFF(SH_TAILQ_NEXTP(elm, \
+ field, type), elm); \
+ *__SH_TAILQ_PREV_OFF(elm, field) += (elm)->field.stqe_next;\
+ } else { \
+ (head)->stqh_last = (elm)->field.stqe_prev + \
+ SH_PTR_TO_OFF(head, elm); \
+ *__SH_TAILQ_PREV_OFF(elm, field) = -1; \
+ } \
+} while (0)
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !_DB_SHQUEUE_H_ */
diff --git a/db-4.8.30/dbinc/tcl_db.h b/db-4.8.30/dbinc/tcl_db.h
new file mode 100644
index 0000000..7a7a734
--- /dev/null
+++ b/db-4.8.30/dbinc/tcl_db.h
@@ -0,0 +1,278 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_TCL_DB_H_
+#define _DB_TCL_DB_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define MSG_SIZE 100 /* Message size */
+
+enum INFOTYPE {
+ I_DB, I_DBC, I_ENV, I_LOCK, I_LOGC, I_MP, I_NDBM, I_PG, I_SEQ, I_TXN};
+
+#define MAX_ID 8 /* Maximum number of sub-id's we need */
+#define DBTCL_PREP 64 /* Size of txn_recover preplist */
+
+#define DBTCL_DBM 1
+#define DBTCL_NDBM 2
+
+#define DBTCL_GETCLOCK 0
+#define DBTCL_GETLIMIT 1
+#define DBTCL_GETREQ 2
+
+#define DBTCL_MUT_ALIGN 0
+#define DBTCL_MUT_INCR 1
+#define DBTCL_MUT_MAX 2
+#define DBTCL_MUT_TAS 3
+
+/*
+ * Why use a home grown package over the Tcl_Hash functions?
+ *
+ * We could have implemented the stuff below without maintaining our
+ * own list manipulation, efficiently hashing it with the available
+ * Tcl functions (Tcl_CreateHashEntry, Tcl_GetHashValue, etc). I chose
+ * not to do so for these reasons:
+ *
+ * We still need the information below. Using the hashing only removes
+ * us from needing the next/prev pointers. We still need the structure
+ * itself because we need more than one value associated with a widget.
+ * We need to keep track of parent pointers for sub-widgets (like cursors)
+ * so we can correctly close. We need to keep track of individual widget's
+ * id counters for any sub-widgets they may have. We need to be able to
+ * associate the name/client data outside the scope of the widget.
+ *
+ * So, is it better to use the hashing rather than
+ * the linear list we have now? I decided against it for the simple reason
+ * that to access the structure would require two calls. The first is
+ * Tcl_FindHashEntry(table, key) and then, once we have the entry, we'd
+ * have to do Tcl_GetHashValue(entry) to get the pointer of the structure.
+ *
+ * I believe the number of simultaneous DB widgets in existence at one time
+ * is not going to be that large (more than several dozen) such that
+ * linearly searching the list is not going to impact performance in a
+ * noticeable way. Should performance be impacted due to the size of the
+ * info list, then perhaps it is time to revisit this decision.
+ */
+typedef struct dbtcl_info {
+ LIST_ENTRY(dbtcl_info) entries;
+ Tcl_Interp *i_interp;
+ char *i_name;
+ enum INFOTYPE i_type;
+ union infop {
+ DB *dbp;
+ DBC *dbcp;
+ DB_ENV *envp;
+ DB_LOCK *lock;
+ DB_LOGC *logc;
+ DB_MPOOLFILE *mp;
+ DB_TXN *txnp;
+ void *anyp;
+ } un;
+ union data {
+ int anydata;
+ db_pgno_t pgno;
+ u_int32_t lockid;
+ } und;
+ union data2 {
+ int anydata;
+ int pagesz;
+ DB_COMPACT *c_data;
+ } und2;
+ DBT i_lockobj;
+ FILE *i_err;
+ char *i_errpfx;
+
+ /* Callbacks--Tcl_Objs containing proc names */
+ Tcl_Obj *i_compare;
+ Tcl_Obj *i_dupcompare;
+ Tcl_Obj *i_event;
+ Tcl_Obj *i_hashproc;
+ Tcl_Obj *i_isalive;
+ Tcl_Obj *i_part_callback;
+ Tcl_Obj *i_rep_send;
+ Tcl_Obj *i_second_call;
+
+ /* Environment ID for the i_rep_send callback. */
+ Tcl_Obj *i_rep_eid;
+
+ struct dbtcl_info *i_parent;
+ int i_otherid[MAX_ID];
+} DBTCL_INFO;
+
+#define i_anyp un.anyp
+#define i_dbp un.dbp
+#define i_dbcp un.dbcp
+#define i_envp un.envp
+#define i_lock un.lock
+#define i_logc un.logc
+#define i_mp un.mp
+#define i_pagep un.anyp
+#define i_txnp un.txnp
+
+#define i_data und.anydata
+#define i_pgno und.pgno
+#define i_locker und.lockid
+#define i_data2 und2.anydata
+#define i_pgsz und2.pagesz
+#define i_cdata und2.c_data
+
+#define i_envtxnid i_otherid[0]
+#define i_envmpid i_otherid[1]
+#define i_envlockid i_otherid[2]
+#define i_envlogcid i_otherid[3]
+
+#define i_mppgid i_otherid[0]
+
+#define i_dbdbcid i_otherid[0]
+
+extern int __debug_on, __debug_print, __debug_stop, __debug_test;
+
+typedef struct dbtcl_global {
+ LIST_HEAD(infohead, dbtcl_info) g_infohead;
+} DBTCL_GLOBAL;
+#define __db_infohead __dbtcl_global.g_infohead
+
+extern DBTCL_GLOBAL __dbtcl_global;
+
+/*
+ * Tcl_NewStringObj takes an "int" length argument, when the typical use is to
+ * call it with a size_t length (for example, returned by strlen). Tcl is in
+ * the wrong, but that doesn't help us much -- cast the argument.
+ */
+#define NewStringObj(a, b) \
+ Tcl_NewStringObj(a, (int)b)
+
+#define NAME_TO_DB(name) (DB *)_NameToPtr((name))
+#define NAME_TO_DBC(name) (DBC *)_NameToPtr((name))
+#define NAME_TO_ENV(name) (DB_ENV *)_NameToPtr((name))
+#define NAME_TO_LOCK(name) (DB_LOCK *)_NameToPtr((name))
+#define NAME_TO_MP(name) (DB_MPOOLFILE *)_NameToPtr((name))
+#define NAME_TO_TXN(name) (DB_TXN *)_NameToPtr((name))
+#define NAME_TO_SEQUENCE(name) (DB_SEQUENCE *)_NameToPtr((name))
+
+/*
+ * MAKE_STAT_LIST appends a {name value} pair to a result list that MUST be
+ * called 'res' that is a Tcl_Obj * in the local function. This macro also
+ * assumes a label "error" to go to in the event of a Tcl error. For stat
+ * functions this will typically go before the "free" function to free the
+ * stat structure returned by DB.
+ */
+#define MAKE_STAT_LIST(s, v) do { \
+ result = _SetListElemInt(interp, res, (s), (long)(v)); \
+ if (result != TCL_OK) \
+ goto error; \
+} while (0)
+
+#define MAKE_WSTAT_LIST(s, v) do { \
+ result = _SetListElemWideInt(interp, res, (s), (int64_t)(v)); \
+ if (result != TCL_OK) \
+ goto error; \
+} while (0)
+
+/*
+ * MAKE_STAT_LSN appends a {name {LSNfile LSNoffset}} pair to a result list
+ * that MUST be called 'res' that is a Tcl_Obj * in the local
+ * function. This macro also assumes a label "error" to go to
+ * in the even of a Tcl error. For stat functions this will
+ * typically go before the "free" function to free the stat structure
+ * returned by DB.
+ */
+#define MAKE_STAT_LSN(s, lsn) do { \
+ myobjc = 2; \
+ myobjv[0] = Tcl_NewLongObj((long)(lsn)->file); \
+ myobjv[1] = Tcl_NewLongObj((long)(lsn)->offset); \
+ lsnlist = Tcl_NewListObj(myobjc, myobjv); \
+ myobjc = 2; \
+ myobjv[0] = Tcl_NewStringObj((s), (int)strlen(s)); \
+ myobjv[1] = lsnlist; \
+ thislist = Tcl_NewListObj(myobjc, myobjv); \
+ result = Tcl_ListObjAppendElement(interp, res, thislist); \
+ if (result != TCL_OK) \
+ goto error; \
+} while (0)
+
+/*
+ * MAKE_STAT_STRLIST appends a {name string} pair to a result list
+ * that MUST be called 'res' that is a Tcl_Obj * in the local
+ * function. This macro also assumes a label "error" to go to
+ * in the even of a Tcl error. For stat functions this will
+ * typically go before the "free" function to free the stat structure
+ * returned by DB.
+ */
+#define MAKE_STAT_STRLIST(s,s1) do { \
+ result = _SetListElem(interp, res, (s), (u_int32_t)strlen(s), \
+ (s1), (u_int32_t)strlen(s1)); \
+ if (result != TCL_OK) \
+ goto error; \
+} while (0)
+
+/*
+ * MAKE_SITE_LIST appends a {eid host port status} tuple to a result list
+ * that MUST be called 'res' that is a Tcl_Obj * in the local function.
+ * This macro also assumes a label "error" to go to in the event of a Tcl
+ * error.
+ */
+#define MAKE_SITE_LIST(e, h, p, s) do { \
+ myobjc = 4; \
+ myobjv[0] = Tcl_NewIntObj(e); \
+ myobjv[1] = Tcl_NewStringObj((h), (int)strlen(h)); \
+ myobjv[2] = Tcl_NewIntObj((int)p); \
+ myobjv[3] = Tcl_NewStringObj((s), (int)strlen(s)); \
+ thislist = Tcl_NewListObj(myobjc, myobjv); \
+ result = Tcl_ListObjAppendElement(interp, res, thislist); \
+ if (result != TCL_OK) \
+ goto error; \
+} while (0)
+
+/*
+ * FLAG_CHECK checks that the given flag is not set yet.
+ * If it is, it sets up an error message.
+ */
+#define FLAG_CHECK(flag) do { \
+ if ((flag) != 0) { \
+ Tcl_SetResult(interp, \
+ " Only 1 policy can be specified.\n", \
+ TCL_STATIC); \
+ result = TCL_ERROR; \
+ break; \
+ } \
+} while (0)
+
+/*
+ * FLAG_CHECK2 checks that the given flag is not set yet or is
+ * only set to the given allowed value.
+ * If it is, it sets up an error message.
+ */
+#define FLAG_CHECK2(flag, val) do { \
+ if (((flag) & ~(val)) != 0) { \
+ Tcl_SetResult(interp, \
+ " Only 1 policy can be specified.\n", \
+ TCL_STATIC); \
+ result = TCL_ERROR; \
+ break; \
+ } \
+} while (0)
+
+/*
+ * IS_HELP checks whether the arg we bombed on is -?, which is a help option.
+ * If it is, we return TCL_OK (but leave the result set to whatever
+ * Tcl_GetIndexFromObj says, which lists all the valid options. Otherwise
+ * return TCL_ERROR.
+ */
+#define IS_HELP(s) \
+ (strcmp(Tcl_GetStringFromObj(s,NULL), "-?") == 0) ? TCL_OK : TCL_ERROR
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/tcl_ext.h"
+#endif /* !_DB_TCL_DB_H_ */
diff --git a/db-4.8.30/dbinc/txn.h b/db-4.8.30/dbinc/txn.h
new file mode 100644
index 0000000..7ee7542
--- /dev/null
+++ b/db-4.8.30/dbinc/txn.h
@@ -0,0 +1,227 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef _DB_TXN_H_
+#define _DB_TXN_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Operation parameters to the delayed commit processing code. */
+typedef enum {
+ TXN_CLOSE, /* Close a DB handle whose close had failed. */
+ TXN_REMOVE, /* Remove a file. */
+ TXN_TRADE, /* Trade lockers. */
+ TXN_TRADED /* Already traded; downgrade lock. */
+} TXN_EVENT_T;
+
+struct __db_txnregion; typedef struct __db_txnregion DB_TXNREGION;
+struct __txn_logrec; typedef struct __txn_logrec DB_TXNLOGREC;
+
+/*
+ * !!!
+ * TXN_MINIMUM = (DB_LOCK_MAXID + 1) but this makes compilers complain.
+ */
+#define TXN_MINIMUM 0x80000000
+#define TXN_MAXIMUM 0xffffffff /* Maximum number of txn ids. */
+#define TXN_INVALID 0 /* Invalid transaction ID. */
+
+#define DEF_MAX_TXNS 100 /* Default max transactions. */
+#define TXN_NSLOTS 4 /* Initial slots to hold DB refs */
+
+/*
+ * Internal data maintained in shared memory for each transaction.
+ */
+typedef struct __txn_detail {
+ u_int32_t txnid; /* current transaction id
+ used to link free list also */
+ pid_t pid; /* Process owning txn */
+ db_threadid_t tid; /* Thread owning txn */
+
+ DB_LSN last_lsn; /* Last LSN written for this txn. */
+ DB_LSN begin_lsn; /* LSN of begin record. */
+ roff_t parent; /* Offset of transaction's parent. */
+ roff_t name; /* Offset of txn name. */
+
+ u_int32_t nlog_dbs; /* Number of databases used. */
+ u_int32_t nlog_slots; /* Number of allocated slots. */
+ roff_t log_dbs; /* Databases used. */
+
+ DB_LSN read_lsn; /* Read LSN for MVCC. */
+ DB_LSN visible_lsn; /* LSN at which this transaction's
+ changes are visible. */
+ db_mutex_t mvcc_mtx; /* Version mutex. */
+ u_int32_t mvcc_ref; /* Number of buffers created by this
+ transaction still in cache. */
+
+ SH_TAILQ_HEAD(__tdkids) kids; /* Linked list of child txn detail. */
+ SH_TAILQ_ENTRY klinks;
+
+ /* TXN_{ABORTED, COMMITTED PREPARED, RUNNING} */
+ u_int32_t status; /* status of the transaction */
+
+#define TXN_DTL_COLLECTED 0x1 /* collected during txn_recover */
+#define TXN_DTL_RESTORED 0x2 /* prepared txn restored */
+#define TXN_DTL_INMEMORY 0x4 /* uses in memory logs */
+#define TXN_DTL_SNAPSHOT 0x8 /* On the list of snapshot txns. */
+ u_int32_t flags;
+
+ SH_TAILQ_ENTRY links; /* active/free/snapshot list */
+
+ u_int8_t gid[DB_GID_SIZE]; /* global transaction id */
+ roff_t slots[TXN_NSLOTS]; /* Initial DB slot allocation. */
+} TXN_DETAIL;
+
+/*
+ * DB_TXNMGR --
+ * The transaction manager encapsulates the transaction system.
+ */
+struct __db_txnmgr {
+ /*
+ * These fields need to be protected for multi-threaded support.
+ *
+ * Lock list of active transactions (including the content of each
+ * TXN_DETAIL structure on the list).
+ */
+ db_mutex_t mutex;
+ /* List of active transactions. */
+ TAILQ_HEAD(_chain, __db_txn) txn_chain;
+
+ u_int32_t n_discards; /* Number of txns discarded. */
+
+ /* These fields are never updated after creation, so not protected. */
+ ENV *env; /* Environment. */
+ REGINFO reginfo; /* Region information. */
+};
+
+/* Macros to lock/unlock the transaction region as a whole. */
+#define TXN_SYSTEM_LOCK(env) \
+ MUTEX_LOCK(env, ((DB_TXNREGION *) \
+ (env)->tx_handle->reginfo.primary)->mtx_region)
+#define TXN_SYSTEM_UNLOCK(env) \
+ MUTEX_UNLOCK(env, ((DB_TXNREGION *) \
+ (env)->tx_handle->reginfo.primary)->mtx_region)
+
+/*
+ * DB_TXNREGION --
+ * The primary transaction data structure in the shared memory region.
+ */
+struct __db_txnregion {
+ db_mutex_t mtx_region; /* Region mutex. */
+
+ u_int32_t maxtxns; /* maximum number of active TXNs */
+ u_int32_t last_txnid; /* last transaction id given out */
+ u_int32_t cur_maxid; /* current max unused id. */
+
+ db_mutex_t mtx_ckp; /* Single thread checkpoints. */
+ DB_LSN last_ckp; /* lsn of the last checkpoint */
+ time_t time_ckp; /* time of last checkpoint */
+
+ DB_TXN_STAT stat; /* Statistics for txns. */
+
+#define TXN_IN_RECOVERY 0x01 /* environment is being recovered */
+ u_int32_t flags;
+ /* active TXN list */
+ SH_TAILQ_HEAD(__active) active_txn;
+ SH_TAILQ_HEAD(__mvcc) mvcc_txn;
+};
+
+/*
+ * DB_TXNLOGREC --
+ * An in-memory, linked-list copy of a log record.
+ */
+struct __txn_logrec {
+ STAILQ_ENTRY(__txn_logrec) links;/* Linked list. */
+
+ u_int8_t data[1]; /* Log record. */
+};
+
+/*
+ * Log record types. Note that these are *not* alphabetical. This is
+ * intentional so that we don't change the meaning of values between
+ * software upgrades.
+ *
+ * EXPECTED, UNEXPECTED, IGNORE, and OK are used in the txnlist functions.
+ * Here is an explanation of how the statuses are used.
+ *
+ * TXN_OK
+ * BEGIN records for transactions found on the txnlist during
+ * OPENFILES (BEGIN records are those with a prev_lsn of 0,0)
+ *
+ * TXN_COMMIT
+ * Transaction committed and should be rolled forward.
+ *
+ * TXN_ABORT
+ * This transaction's changes must be undone. Either there was
+ * never a prepare or commit record for this transaction OR there
+ * was a commit, but we are recovering to a timestamp or particular
+ * LSN and that point is before this transaction's commit.
+ *
+ * TXN_PREPARE
+ * Prepare record, but no commit record is in the log.
+ *
+ * TXN_IGNORE
+ * Generic meaning is that this transaction should not be
+ * processed during later recovery passes. We use it in a
+ * number of different manners:
+ *
+ * 1. We never saw its BEGIN record. Therefore, the logs have
+ * been reclaimed and we *know* that this transaction doesn't
+ * need to be aborted, because in order for it to be
+ * reclaimed, there must have been a subsequent checkpoint
+ * (and any dirty pages for this transaction made it to
+ * disk).
+ *
+ * 2. This is a child transaction that created a database.
+ * For some reason, we don't want to recreate that database
+ * (i.e., it already exists or some other database created
+ * after it exists).
+ *
+ * 3. During recovery open of subdatabases, if the master check fails,
+ * we use a TXN_IGNORE on the create of the subdb in the nested
+ * transaction.
+ *
+ * 4. During a remove, the file with the name being removed isn't
+ * the file for which we are recovering a remove.
+ *
+ * TXN_EXPECTED
+ * After a successful open during recovery, we update the
+ * transaction's status to TXN_EXPECTED. The open was done
+ * in the parent, but in the open log record, we record the
+ * child transaction's ID if we also did a create. When there
+ * is a valid ID in that field, we use it and mark the child's
+ * status as TXN_EXPECTED (indicating that we don't need to redo
+ * a create for this file).
+ *
+ * When recovering a remove, if we don't find or can't open
+ * the file, the child (which does the remove) gets marked
+ * EXPECTED (indicating that we don't need to redo the remove).
+ *
+ * TXN_UNEXPECTED
+ * During recovery, we attempted an open that should have succeeded
+ * and we got ENOENT, so like with the EXPECTED case, we indicate
+ * in the child that we got the UNEXPECTED return so that we do redo
+ * the creating/deleting operation.
+ *
+ */
+#define TXN_OK 0
+#define TXN_COMMIT 1
+#define TXN_PREPARE 2
+#define TXN_ABORT 3
+#define TXN_IGNORE 4
+#define TXN_EXPECTED 5
+#define TXN_UNEXPECTED 6
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "dbinc_auto/txn_auto.h"
+#include "dbinc_auto/txn_ext.h"
+#endif /* !_DB_TXN_H_ */
diff --git a/db-4.8.30/dbinc/win_db.h b/db-4.8.30/dbinc/win_db.h
new file mode 100644
index 0000000..ac8abb5
--- /dev/null
+++ b/db-4.8.30/dbinc/win_db.h
@@ -0,0 +1,144 @@
+/*-
+ * $Id$
+ *
+ * The following provides the information necessary to build Berkeley
+ * DB on native Windows, and other Windows environments such as MinGW.
+ */
+
+/*
+ * Windows NT 4.0 and later required for the replication manager.
+ */
+#ifdef HAVE_REPLICATION_THREADS
+#define _WIN32_WINNT 0x0400
+#endif
+
+#ifndef DB_WINCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/timeb.h>
+
+#include <direct.h>
+#include <fcntl.h>
+#include <io.h>
+#include <limits.h>
+#include <memory.h>
+#include <process.h>
+#include <signal.h>
+#endif /* DB_WINCE */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <tchar.h>
+#include <time.h>
+
+/*
+ * To build Tcl interface libraries, the include path must be configured to
+ * use the directory containing <tcl.h>, usually the include directory in
+ * the Tcl distribution.
+ */
+#ifdef DB_TCL_SUPPORT
+#include <tcl.h>
+#endif
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <winsock2.h>
+
+#ifdef HAVE_GETADDRINFO
+/*
+ * Need explicit includes for IPv6 support on Windows. Both are necessary to
+ * ensure that pre WinXP versions have an implementation of the getaddrinfo API.
+ */
+#include <ws2tcpip.h>
+#include <wspiapi.h>
+#endif
+
+/*
+ * Microsoft's C runtime library has fsync, getcwd, getpid, snprintf and
+ * vsnprintf, but under different names.
+ */
+#define fsync _commit
+
+#ifndef DB_WINCE
+#define getcwd(buf, size) _getcwd(buf, size)
+#endif
+#define getpid GetCurrentProcessId
+#define snprintf _snprintf
+#define strcasecmp _stricmp
+#define strncasecmp _strnicmp
+#define vsnprintf _vsnprintf
+
+#define h_errno WSAGetLastError()
+
+/*
+ * Win32 does not have getopt.
+ *
+ * The externs are here, instead of using db_config.h and clib_port.h, because
+ * that approach changes function names to BDB specific names, and the example
+ * programs use getopt and can't use BDB specific names.
+ */
+#if defined(__cplusplus)
+extern "C" {
+#endif
+extern int getopt(int, char * const *, const char *);
+#if defined(__cplusplus)
+}
+#endif
+
+/*
+ * Microsoft's compiler _doesn't_ define __STDC__ unless you invoke it with
+ * arguments turning OFF all vendor extensions. Even more unfortunately, if
+ * we do that, it fails to parse windows.h!!!!! So, we define __STDC__ here,
+ * after windows.h comes in. Note: the compiler knows we've defined it, and
+ * starts enforcing strict ANSI compliance from this point on.
+ */
+#ifndef __STDC__
+#define __STDC__ 1
+#endif
+
+#ifdef _UNICODE
+#define TO_TSTRING(dbenv, s, ts, ret) do { \
+ int __len = (int)strlen(s) + 1; \
+ ts = NULL; \
+ if ((ret = __os_malloc((dbenv), \
+ __len * sizeof(_TCHAR), &(ts))) == 0 && \
+ MultiByteToWideChar(CP_UTF8, 0, \
+ (s), -1, (ts), __len) == 0) \
+ ret = __os_posix_err(__os_get_syserr()); \
+ } while (0)
+
+#define FROM_TSTRING(dbenv, ts, s, ret) { \
+ int __len = WideCharToMultiByte(CP_UTF8, 0, ts, -1, \
+ NULL, 0, NULL, NULL); \
+ s = NULL; \
+ if ((ret = __os_malloc((dbenv), __len, &(s))) == 0 && \
+ WideCharToMultiByte(CP_UTF8, 0, \
+ (ts), -1, (s), __len, NULL, NULL) == 0) \
+ ret = __os_posix_err(__os_get_syserr()); \
+ } while (0)
+
+#define FREE_STRING(dbenv, s) do { \
+ if ((s) != NULL) { \
+ __os_free((dbenv), (s)); \
+ (s) = NULL; \
+ } \
+ } while (0)
+
+#else
+#define TO_TSTRING(dbenv, s, ts, ret) (ret) = 0, (ts) = (_TCHAR *)(s)
+#define FROM_TSTRING(dbenv, ts, s, ret) (ret) = 0, (s) = (char *)(ts)
+#define FREE_STRING(dbenv, ts)
+#endif
+
+#ifndef INVALID_HANDLE_VALUE
+#define INVALID_HANDLE_VALUE ((HANDLE)-1)
+#endif
+
+#ifndef INVALID_FILE_ATTRIBUTES
+#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
+#endif
+
+#ifndef INVALID_SET_FILE_POINTER
+#define INVALID_SET_FILE_POINTER ((DWORD)-1)
+#endif