summaryrefslogtreecommitdiff
path: root/db-4.8.30/btree/bt_rsearch.c
diff options
context:
space:
mode:
authorJesse Morgan <jesse@jesterpm.net>2016-12-17 21:28:53 -0800
committerJesse Morgan <jesse@jesterpm.net>2016-12-17 21:28:53 -0800
commit54df2afaa61c6a03cbb4a33c9b90fa572b6d07b8 (patch)
tree18147b92b969d25ffbe61935fb63035cac820dd0 /db-4.8.30/btree/bt_rsearch.c
Berkeley DB 4.8 with rust build script for linux.
Diffstat (limited to 'db-4.8.30/btree/bt_rsearch.c')
-rw-r--r--db-4.8.30/btree/bt_rsearch.c502
1 files changed, 502 insertions, 0 deletions
diff --git a/db-4.8.30/btree/bt_rsearch.c b/db-4.8.30/btree/bt_rsearch.c
new file mode 100644
index 0000000..1d5581a
--- /dev/null
+++ b/db-4.8.30/btree/bt_rsearch.c
@@ -0,0 +1,502 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ * Keith Bostic. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
+
+/*
+ * __bam_rsearch --
+ * Search a btree for a record number.
+ *
+ * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *));
+ */
+int
+__bam_rsearch(dbc, recnop, flags, stop, exactp)
+ DBC *dbc;
+ db_recno_t *recnop;
+ u_int32_t flags;
+ int stop, *exactp;
+{
+ BINTERNAL *bi;
+ BTREE_CURSOR *cp;
+ DB *dbp;
+ DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
+ ENV *env;
+ PAGE *h;
+ RINTERNAL *ri;
+ db_indx_t adjust, deloffset, indx, top;
+ db_lockmode_t lock_mode;
+ db_pgno_t pg;
+ db_recno_t recno, t_recno, total;
+ u_int32_t get_mode;
+ int ret, stack, t_ret;
+
+ dbp = dbc->dbp;
+ env = dbp->env;
+ mpf = dbp->mpf;
+ cp = (BTREE_CURSOR *)dbc->internal;
+ h = NULL;
+
+ BT_STK_CLR(cp);
+
+ /*
+ * There are several ways we search a btree tree. The flags argument
+ * specifies if we're acquiring read or write locks and if we are
+ * locking pairs of pages. In addition, if we're adding or deleting
+ * an item, we have to lock the entire tree, regardless. See btree.h
+ * for more details.
+ *
+ * If write-locking pages, we need to know whether or not to acquire a
+ * write lock on a page before getting it. This depends on how deep it
+ * is in tree, which we don't know until we acquire the root page. So,
+ * if we need to lock the root page we may have to upgrade it later,
+ * because we won't get the correct lock initially.
+ *
+ * Retrieve the root page.
+ */
+
+ if ((ret = __bam_get_root(dbc, cp->root, stop, flags, &stack)) != 0)
+ return (ret);
+ lock_mode = cp->csp->lock_mode;
+ get_mode = lock_mode == DB_LOCK_WRITE ? DB_MPOOL_DIRTY : 0;
+ lock = cp->csp->lock;
+ h = cp->csp->page;
+
+ BT_STK_CLR(cp);
+ /*
+ * If appending to the tree, set the record number now -- we have the
+ * root page locked.
+ *
+ * Delete only deletes exact matches, read only returns exact matches.
+ * Note, this is different from __bam_search(), which returns non-exact
+ * matches for read.
+ *
+ * The record may not exist. We can only return the correct location
+ * for the record immediately after the last record in the tree, so do
+ * a fast check now.
+ */
+ total = RE_NREC(h);
+ if (LF_ISSET(SR_APPEND)) {
+ *exactp = 0;
+ *recnop = recno = total + 1;
+ } else {
+ recno = *recnop;
+ if (recno <= total)
+ *exactp = 1;
+ else {
+ *exactp = 0;
+ if (!LF_ISSET(SR_PAST_EOF) || recno > total + 1) {
+ /*
+ * Keep the page locked for serializability.
+ *
+ * XXX
+ * This leaves the root page locked, which will
+ * eliminate any concurrency. A possible fix
+ * would be to lock the last leaf page instead.
+ */
+ ret = __memp_fput(mpf,
+ dbc->thread_info, h, dbc->priority);
+ if ((t_ret =
+ __TLPUT(dbc, lock)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret == 0 ? DB_NOTFOUND : ret);
+ }
+ }
+ }
+
+ /*
+ * !!!
+ * Record numbers in the tree are 0-based, but the recno is
+ * 1-based. All of the calculations below have to take this
+ * into account.
+ */
+ for (total = 0;;) {
+ switch (TYPE(h)) {
+ case P_LBTREE:
+ if (LF_ISSET(SR_MAX)) {
+ indx = NUM_ENT(h) - 2;
+ goto enter;
+ }
+ /* FALLTHROUGH */
+ case P_LDUP:
+ if (LF_ISSET(SR_MAX)) {
+ indx = NUM_ENT(h) - 1;
+ goto enter;
+ }
+ recno -= total;
+ /*
+ * There may be logically deleted records on the page.
+ * If there are enough, the record may not exist.
+ */
+ if (TYPE(h) == P_LBTREE) {
+ adjust = P_INDX;
+ deloffset = O_INDX;
+ } else {
+ adjust = O_INDX;
+ deloffset = 0;
+ }
+ for (t_recno = 0, indx = 0;; indx += adjust) {
+ if (indx >= NUM_ENT(h)) {
+ *exactp = 0;
+ if (!LF_ISSET(SR_PAST_EOF) ||
+ recno > t_recno + 1) {
+ ret = __memp_fput(mpf,
+ dbc->thread_info,
+ h, dbc->priority);
+ h = NULL;
+ if ((t_ret = __TLPUT(dbc,
+ lock)) != 0 && ret == 0)
+ ret = t_ret;
+ if (ret == 0)
+ ret = DB_NOTFOUND;
+ goto err;
+ }
+ }
+ if (!B_DISSET(GET_BKEYDATA(dbp, h,
+ indx + deloffset)->type) &&
+ ++t_recno == recno)
+ break;
+ }
+
+ BT_STK_ENTER(env, cp, h, indx, lock, lock_mode, ret);
+ if (ret != 0)
+ goto err;
+ if (LF_ISSET(SR_BOTH))
+ goto get_prev;
+ return (0);
+ case P_IBTREE:
+ if (LF_ISSET(SR_MAX)) {
+ indx = NUM_ENT(h);
+ bi = GET_BINTERNAL(dbp, h, indx - 1);
+ } else for (indx = 0, top = NUM_ENT(h);;) {
+ bi = GET_BINTERNAL(dbp, h, indx);
+ if (++indx == top || total + bi->nrecs >= recno)
+ break;
+ total += bi->nrecs;
+ }
+ pg = bi->pgno;
+ break;
+ case P_LRECNO:
+ if (LF_ISSET(SR_MAX))
+ recno = NUM_ENT(h);
+ else
+ recno -= total;
+
+ /* Correct from 1-based to 0-based for a page offset. */
+ --recno;
+enter: BT_STK_ENTER(env, cp, h, recno, lock, lock_mode, ret);
+ if (ret != 0)
+ goto err;
+ if (LF_ISSET(SR_BOTH)) {
+get_prev: DB_ASSERT(env, LF_ISSET(SR_NEXT));
+ /*
+ * We have a NEXT tree, now add the sub tree
+ * that points gets to the previous page.
+ */
+ cp->csp++;
+ indx = cp->sp->indx - 1;
+ h = cp->sp->page;
+ if (TYPE(h) == P_IRECNO) {
+ ri = GET_RINTERNAL(dbp, h, indx);
+ pg = ri->pgno;
+ } else {
+ DB_ASSERT(env, TYPE(h) == P_IBTREE);
+ bi = GET_BINTERNAL(dbp, h, indx);
+ pg = bi->pgno;
+ }
+ LF_CLR(SR_NEXT | SR_BOTH);
+ LF_SET(SR_MAX);
+ stack = 1;
+ h = NULL;
+ goto lock_next;
+ }
+ return (0);
+ case P_IRECNO:
+ if (LF_ISSET(SR_MAX)) {
+ indx = NUM_ENT(h);
+ ri = GET_RINTERNAL(dbp, h, indx - 1);
+ } else for (indx = 0, top = NUM_ENT(h);;) {
+ ri = GET_RINTERNAL(dbp, h, indx);
+ if (++indx == top || total + ri->nrecs >= recno)
+ break;
+ total += ri->nrecs;
+ }
+ pg = ri->pgno;
+ break;
+ default:
+ return (__db_pgfmt(env, h->pgno));
+ }
+ --indx;
+
+ /* Return if this is the lowest page wanted. */
+ if (stop == LEVEL(h)) {
+ BT_STK_ENTER(env, cp, h, indx, lock, lock_mode, ret);
+ if (ret != 0)
+ goto err;
+ return (0);
+ }
+ if (stack) {
+ BT_STK_PUSH(env, cp, h, indx, lock, lock_mode, ret);
+ if (ret != 0)
+ goto err;
+ h = NULL;
+
+ lock_mode = DB_LOCK_WRITE;
+ get_mode = DB_MPOOL_DIRTY;
+ if ((ret =
+ __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
+ goto err;
+ } else if (LF_ISSET(SR_NEXT)) {
+ /*
+ * For RECNO if we are doing a NEXT search the
+ * search recno is the one we are looking for
+ * but we want to keep the stack from the spanning
+ * node on down. We only know we have the spanning
+ * node when its child's index is 0, so save
+ * each node and discard the tree when we find out
+ * its not needed.
+ */
+ if (indx != 0 && cp->sp->page != NULL) {
+ BT_STK_POP(cp);
+ if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0)
+ goto err;
+ }
+
+ BT_STK_PUSH(env, cp, h, indx, lock, lock_mode, ret);
+ h = NULL;
+ if (ret != 0)
+ goto err;
+lock_next: if ((ret =
+ __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
+ goto err;
+ } else {
+ /*
+ * Decide if we want to return a pointer to the next
+ * page in the stack. If we do, write lock it and
+ * never unlock it.
+ */
+ if ((LF_ISSET(SR_PARENT) &&
+ (u_int8_t)(stop + 1) >= (u_int8_t)(LEVEL(h) - 1)) ||
+ (LEVEL(h) - 1) == LEAFLEVEL)
+ stack = 1;
+
+ if ((ret = __memp_fput(mpf,
+ dbc->thread_info, h, dbc->priority)) != 0)
+ goto err;
+ h = NULL;
+
+ lock_mode = stack &&
+ LF_ISSET(SR_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ;
+ if (lock_mode == DB_LOCK_WRITE)
+ get_mode = DB_MPOOL_DIRTY;
+ if ((ret = __db_lget(dbc,
+ LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
+ /*
+ * If we fail, discard the lock we held. This
+ * is OK because this only happens when we are
+ * descending the tree holding read-locks.
+ */
+ (void)__LPUT(dbc, lock);
+ goto err;
+ }
+ }
+
+ if ((ret = __memp_fget(mpf, &pg,
+ dbc->thread_info, dbc->txn, get_mode, &h)) != 0)
+ goto err;
+ }
+ /* NOTREACHED */
+
+err: if (h != NULL && (t_ret = __memp_fput(mpf,
+ dbc->thread_info, h, dbc->priority)) != 0 && ret == 0)
+ ret = t_ret;
+
+ BT_STK_POP(cp);
+ (void)__bam_stkrel(dbc, 0);
+
+ return (ret);
+}
+
+/*
+ * __bam_adjust --
+ * Adjust the tree after adding or deleting a record.
+ *
+ * PUBLIC: int __bam_adjust __P((DBC *, int32_t));
+ */
+int
+__bam_adjust(dbc, adjust)
+ DBC *dbc;
+ int32_t adjust;
+{
+ BTREE_CURSOR *cp;
+ DB *dbp;
+ DB_MPOOLFILE *mpf;
+ EPG *epg;
+ PAGE *h;
+ db_pgno_t root_pgno;
+ int ret;
+
+ dbp = dbc->dbp;
+ mpf = dbp->mpf;
+ cp = (BTREE_CURSOR *)dbc->internal;
+ root_pgno = cp->root;
+
+ /* Update the record counts for the tree. */
+ for (epg = cp->sp; epg <= cp->csp; ++epg) {
+ h = epg->page;
+ if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
+ ret = __memp_dirty(mpf, &h,
+ dbc->thread_info, dbc->txn, dbc->priority, 0);
+ epg->page = h;
+ if (ret != 0)
+ return (ret);
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __bam_cadjust_log(dbp, dbc->txn,
+ &LSN(h), 0, PGNO(h), &LSN(h),
+ (u_int32_t)epg->indx, adjust,
+ PGNO(h) == root_pgno ?
+ CAD_UPDATEROOT : 0)) != 0)
+ return (ret);
+ } else
+ LSN_NOT_LOGGED(LSN(h));
+
+ if (TYPE(h) == P_IBTREE)
+ GET_BINTERNAL(dbp, h, epg->indx)->nrecs +=
+ adjust;
+ else
+ GET_RINTERNAL(dbp, h, epg->indx)->nrecs +=
+ adjust;
+
+ if (PGNO(h) == root_pgno)
+ RE_NREC_ADJ(h, adjust);
+ }
+ }
+ return (0);
+}
+
+/*
+ * __bam_nrecs --
+ * Return the number of records in the tree.
+ *
+ * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *));
+ */
+int
+__bam_nrecs(dbc, rep)
+ DBC *dbc;
+ db_recno_t *rep;
+{
+ DB *dbp;
+ DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
+ PAGE *h;
+ db_pgno_t pgno;
+ int ret, t_ret;
+
+ dbp = dbc->dbp;
+ mpf = dbp->mpf;
+
+ pgno = dbc->internal->root;
+ if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0)
+ return (ret);
+ if ((ret = __memp_fget(mpf, &pgno,
+ dbc->thread_info, dbc->txn, 0, &h)) != 0)
+ return (ret);
+
+ *rep = RE_NREC(h);
+
+ ret = __memp_fput(mpf, dbc->thread_info, h, dbc->priority);
+ if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __bam_total --
+ * Return the number of records below a page.
+ *
+ * PUBLIC: db_recno_t __bam_total __P((DB *, PAGE *));
+ */
+db_recno_t
+__bam_total(dbp, h)
+ DB *dbp;
+ PAGE *h;
+{
+ db_recno_t nrecs;
+ db_indx_t indx, top;
+
+ nrecs = 0;
+ top = NUM_ENT(h);
+
+ switch (TYPE(h)) {
+ case P_LBTREE:
+ /* Check for logically deleted records. */
+ for (indx = 0; indx < top; indx += P_INDX)
+ if (!B_DISSET(
+ GET_BKEYDATA(dbp, h, indx + O_INDX)->type))
+ ++nrecs;
+ break;
+ case P_LDUP:
+ /* Check for logically deleted records. */
+ for (indx = 0; indx < top; indx += O_INDX)
+ if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type))
+ ++nrecs;
+ break;
+ case P_IBTREE:
+ for (indx = 0; indx < top; indx += O_INDX)
+ nrecs += GET_BINTERNAL(dbp, h, indx)->nrecs;
+ break;
+ case P_LRECNO:
+ nrecs = NUM_ENT(h);
+ break;
+ case P_IRECNO:
+ for (indx = 0; indx < top; indx += O_INDX)
+ nrecs += GET_RINTERNAL(dbp, h, indx)->nrecs;
+ break;
+ }
+
+ return (nrecs);
+}