diff options
author | Jesse Morgan <jesse@jesterpm.net> | 2016-12-17 21:28:53 -0800 |
---|---|---|
committer | Jesse Morgan <jesse@jesterpm.net> | 2016-12-17 21:28:53 -0800 |
commit | 54df2afaa61c6a03cbb4a33c9b90fa572b6d07b8 (patch) | |
tree | 18147b92b969d25ffbe61935fb63035cac820dd0 /db-4.8.30/dbreg |
Berkeley DB 4.8 with rust build script for linux.
Diffstat (limited to 'db-4.8.30/dbreg')
-rw-r--r-- | db-4.8.30/dbreg/dbreg.c | 976 | ||||
-rw-r--r-- | db-4.8.30/dbreg/dbreg.src | 38 | ||||
-rw-r--r-- | db-4.8.30/dbreg/dbreg_auto.c | 282 | ||||
-rw-r--r-- | db-4.8.30/dbreg/dbreg_autop.c | 78 | ||||
-rw-r--r-- | db-4.8.30/dbreg/dbreg_rec.c | 397 | ||||
-rw-r--r-- | db-4.8.30/dbreg/dbreg_stat.c | 135 | ||||
-rw-r--r-- | db-4.8.30/dbreg/dbreg_util.c | 810 |
7 files changed, 2716 insertions, 0 deletions
diff --git a/db-4.8.30/dbreg/dbreg.c b/db-4.8.30/dbreg/dbreg.c new file mode 100644 index 0000000..5876ce4 --- /dev/null +++ b/db-4.8.30/dbreg/dbreg.c @@ -0,0 +1,976 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996-2009 Oracle. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" +#include "dbinc/db_am.h" + +static int __dbreg_push_id __P((ENV *, int32_t)); +static int __dbreg_pop_id __P((ENV *, int32_t *)); +static int __dbreg_pluck_id __P((ENV *, int32_t)); + +/* + * The dbreg subsystem, as its name implies, registers database handles so + * that we can associate log messages with them without logging a filename + * or a full, unique DB ID. Instead, we assign each dbp an int32_t which is + * easy and cheap to log, and use this subsystem to map back and forth. + * + * Overview of how dbreg ids are managed: + * + * OPEN + * dbreg_setup (Creates FNAME struct.) + * dbreg_new_id (Assigns new ID to dbp and logs it. May be postponed + * until we attempt to log something else using that dbp, if the dbp + * was opened on a replication client.) + * + * CLOSE + * dbreg_close_id (Logs closure of dbp/revocation of ID.) + * dbreg_revoke_id (As name implies, revokes ID.) + * dbreg_teardown (Destroys FNAME.) + * + * RECOVERY + * dbreg_setup + * dbreg_assign_id (Assigns a particular ID we have in the log to a dbp.) + * + * sometimes: dbreg_revoke_id; dbreg_teardown + * other times: normal close path + * + * A note about locking: + * + * FNAME structures are referenced only by their corresponding dbp's + * until they have a valid id. + * + * Once they have a valid id, they must get linked into the log + * region list so they can get logged on checkpoints. + * + * An FNAME that may/does have a valid id must be accessed under + * protection of the mtx_filelist, with the following exception: + * + * We don't want to have to grab the mtx_filelist on every log + * record, and it should be safe not to do so when we're just + * looking at the id, because once allocated, the id should + * not change under a handle until the handle is closed. + * + * If a handle is closed during an attempt by another thread to + * log with it, well, the application doing the close deserves to + * go down in flames and a lot else is about to fail anyway. + * + * When in the course of logging we encounter an invalid id + * and go to allocate it lazily, we *do* need to check again + * after grabbing the mutex, because it's possible to race with + * another thread that has also decided that it needs to allocate + * a id lazily. + * + * See SR #5623 for further discussion of the new dbreg design. + */ + +/* + * __dbreg_setup -- + * Allocate and initialize an FNAME structure. The FNAME structures + * live in the log shared region and map one-to-one with open database handles. + * When the handle needs to be logged, the FNAME should have a valid fid + * allocated. If the handle currently isn't logged, it still has an FNAME + * entry. If we later discover that the handle needs to be logged, we can + * allocate a id for it later. (This happens when the handle is on a + * replication client that later becomes a master.) + * + * PUBLIC: int __dbreg_setup __P((DB *, const char *, const char *, u_int32_t)); + */ +int +__dbreg_setup(dbp, fname, dname, create_txnid) + DB *dbp; + const char *fname, *dname; + u_int32_t create_txnid; +{ + DB_LOG *dblp; + ENV *env; + FNAME *fnp; + REGINFO *infop; + int ret; + size_t len; + void *p; + + env = dbp->env; + dblp = env->lg_handle; + infop = &dblp->reginfo; + + fnp = NULL; + p = NULL; + + /* Allocate an FNAME and, if necessary, a buffer for the name itself. */ + LOG_SYSTEM_LOCK(env); + if ((ret = __env_alloc(infop, sizeof(FNAME), &fnp)) != 0) + goto err; + memset(fnp, 0, sizeof(FNAME)); + if (fname == NULL) + fnp->fname_off = INVALID_ROFF; + else { + len = strlen(fname) + 1; + if ((ret = __env_alloc(infop, len, &p)) != 0) + goto err; + fnp->fname_off = R_OFFSET(infop, p); + memcpy(p, fname, len); + } + if (dname == NULL) + fnp->dname_off = INVALID_ROFF; + else { + len = strlen(dname) + 1; + if ((ret = __env_alloc(infop, len, &p)) != 0) + goto err; + fnp->dname_off = R_OFFSET(infop, p); + memcpy(p, dname, len); + } + LOG_SYSTEM_UNLOCK(env); + + /* + * Fill in all the remaining info that we'll need later to register + * the file, if we use it for logging. + */ + fnp->id = fnp->old_id = DB_LOGFILEID_INVALID; + fnp->s_type = dbp->type; + memcpy(fnp->ufid, dbp->fileid, DB_FILE_ID_LEN); + fnp->meta_pgno = dbp->meta_pgno; + fnp->create_txnid = create_txnid; + dbp->dbenv->thread_id(dbp->dbenv, &fnp->pid, NULL); + + if (F_ISSET(dbp, DB_AM_INMEM)) + F_SET(fnp, DB_FNAME_INMEM); + if (F_ISSET(dbp, DB_AM_RECOVER)) + F_SET(fnp, DB_FNAME_RECOVER); + fnp->txn_ref = 1; + fnp->mutex = dbp->mutex; + + dbp->log_filename = fnp; + + return (0); + +err: LOG_SYSTEM_UNLOCK(env); + if (ret == ENOMEM) + __db_errx(env, + "Logging region out of memory; you may need to increase its size"); + + return (ret); +} + +/* + * __dbreg_teardown -- + * Destroy a DB handle's FNAME struct. This is only called when closing + * the DB. + * + * PUBLIC: int __dbreg_teardown __P((DB *)); + */ +int +__dbreg_teardown(dbp) + DB *dbp; +{ + int ret; + + /* + * We may not have an FNAME if we were never opened. This is not an + * error. + */ + if (dbp->log_filename == NULL) + return (0); + + ret = __dbreg_teardown_int(dbp->env, dbp->log_filename); + + /* We freed the copy of the mutex from the FNAME. */ + dbp->log_filename = NULL; + dbp->mutex = MUTEX_INVALID; + + return (ret); +} + +/* + * __dbreg_teardown_int -- + * Destroy an FNAME struct. + * + * PUBLIC: int __dbreg_teardown_int __P((ENV *, FNAME *)); + */ +int +__dbreg_teardown_int(env, fnp) + ENV *env; + FNAME *fnp; +{ + DB_LOG *dblp; + REGINFO *infop; + int ret; + + if (F_ISSET(fnp, DB_FNAME_NOTLOGGED)) + return (0); + dblp = env->lg_handle; + infop = &dblp->reginfo; + + DB_ASSERT(env, fnp->id == DB_LOGFILEID_INVALID); + ret = __mutex_free(env, &fnp->mutex); + + LOG_SYSTEM_LOCK(env); + if (fnp->fname_off != INVALID_ROFF) + __env_alloc_free(infop, R_ADDR(infop, fnp->fname_off)); + if (fnp->dname_off != INVALID_ROFF) + __env_alloc_free(infop, R_ADDR(infop, fnp->dname_off)); + __env_alloc_free(infop, fnp); + LOG_SYSTEM_UNLOCK(env); + + return (ret); +} + +/* + * __dbreg_new_id -- + * Get an unused dbreg id to this database handle. + * Used as a wrapper to acquire the mutex and + * only set the id on success. + * + * PUBLIC: int __dbreg_new_id __P((DB *, DB_TXN *)); + */ +int +__dbreg_new_id(dbp, txn) + DB *dbp; + DB_TXN *txn; +{ + DB_LOG *dblp; + ENV *env; + FNAME *fnp; + LOG *lp; + int32_t id; + int ret; + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + /* The mtx_filelist protects the FNAME list and id management. */ + MUTEX_LOCK(env, lp->mtx_filelist); + if (fnp->id != DB_LOGFILEID_INVALID) { + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (0); + } + if ((ret = __dbreg_get_id(dbp, txn, &id)) == 0) + fnp->id = id; + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (ret); +} + +/* + * __dbreg_get_id -- + * Assign an unused dbreg id to this database handle. + * Assume the caller holds the mtx_filelist locked. Assume the + * caller will set the fnp->id field with the id we return. + * + * PUBLIC: int __dbreg_get_id __P((DB *, DB_TXN *, int32_t *)); + */ +int +__dbreg_get_id(dbp, txn, idp) + DB *dbp; + DB_TXN *txn; + int32_t *idp; +{ + DB_LOG *dblp; + ENV *env; + FNAME *fnp; + LOG *lp; + int32_t id; + int ret; + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + /* + * It's possible that after deciding we needed to call this function, + * someone else allocated an ID before we grabbed the lock. Check + * to make sure there was no race and we have something useful to do. + */ + /* Get an unused ID from the free list. */ + if ((ret = __dbreg_pop_id(env, &id)) != 0) + goto err; + + /* If no ID was found, allocate a new one. */ + if (id == DB_LOGFILEID_INVALID) + id = lp->fid_max++; + + /* If the file is durable (i.e., not, not-durable), mark it as such. */ + if (!F_ISSET(dbp, DB_AM_NOT_DURABLE)) + F_SET(fnp, DB_FNAME_DURABLE); + + /* Hook the FNAME into the list of open files. */ + SH_TAILQ_INSERT_HEAD(&lp->fq, fnp, q, __fname); + + /* + * Log the registry. We should only request a new ID in situations + * where logging is reasonable. + */ + DB_ASSERT(env, !F_ISSET(dbp, DB_AM_RECOVER)); + + if ((ret = __dbreg_log_id(dbp, txn, id, 0)) != 0) + goto err; + + /* + * Once we log the create_txnid, we need to make sure we never + * log it again (as might happen if this is a replication client + * that later upgrades to a master). + */ + fnp->create_txnid = TXN_INVALID; + + DB_ASSERT(env, dbp->type == fnp->s_type); + DB_ASSERT(env, dbp->meta_pgno == fnp->meta_pgno); + + if ((ret = __dbreg_add_dbentry(env, dblp, dbp, id)) != 0) + goto err; + /* + * If we have a successful call, set the ID. Otherwise + * we have to revoke it and remove it from all the lists + * it has been added to, and return an invalid id. + */ +err: + if (ret != 0 && id != DB_LOGFILEID_INVALID) { + (void)__dbreg_revoke_id(dbp, 1, id); + id = DB_LOGFILEID_INVALID; + } + *idp = id; + return (ret); +} + +/* + * __dbreg_assign_id -- + * Assign a particular dbreg id to this database handle. + * + * PUBLIC: int __dbreg_assign_id __P((DB *, int32_t, int)); + */ +int +__dbreg_assign_id(dbp, id, deleted) + DB *dbp; + int32_t id; + int deleted; +{ + DB *close_dbp; + DB_LOG *dblp; + ENV *env; + FNAME *close_fnp, *fnp; + LOG *lp; + int ret; + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + close_dbp = NULL; + close_fnp = NULL; + + /* The mtx_filelist protects the FNAME list and id management. */ + MUTEX_LOCK(env, lp->mtx_filelist); + + /* We should only call this on DB handles that have no ID. */ + DB_ASSERT(env, fnp->id == DB_LOGFILEID_INVALID); + + /* + * Make sure there isn't already a file open with this ID. There can + * be in recovery, if we're recovering across a point where an ID got + * reused. + */ + if (__dbreg_id_to_fname(dblp, id, 1, &close_fnp) == 0) { + /* + * We want to save off any dbp we have open with this id. We + * can't safely close it now, because we hold the mtx_filelist, + * but we should be able to rely on it being open in this + * process, and we're running recovery, so no other thread + * should muck with it if we just put off closing it until + * we're ready to return. + * + * Once we have the dbp, revoke its id; we're about to + * reuse it. + */ + ret = __dbreg_id_to_db(env, NULL, &close_dbp, id, 0); + if (ret == ENOENT) { + ret = 0; + goto cont; + } else if (ret != 0) + goto err; + + if ((ret = __dbreg_revoke_id(close_dbp, 1, + DB_LOGFILEID_INVALID)) != 0) + goto err; + } + + /* + * Remove this ID from the free list, if it's there, and make sure + * we don't allocate it anew. + */ +cont: if ((ret = __dbreg_pluck_id(env, id)) != 0) + goto err; + if (id >= lp->fid_max) + lp->fid_max = id + 1; + + /* Now go ahead and assign the id to our dbp. */ + fnp->id = id; + /* If the file is durable (i.e., not, not-durable), mark it as such. */ + if (!F_ISSET(dbp, DB_AM_NOT_DURABLE)) + F_SET(fnp, DB_FNAME_DURABLE); + SH_TAILQ_INSERT_HEAD(&lp->fq, fnp, q, __fname); + + /* + * If we get an error adding the dbentry, revoke the id. + * We void the return value since we want to retain and + * return the original error in ret anyway. + */ + if ((ret = __dbreg_add_dbentry(env, dblp, dbp, id)) != 0) + (void)__dbreg_revoke_id(dbp, 1, id); + else + dblp->dbentry[id].deleted = deleted; + +err: MUTEX_UNLOCK(env, lp->mtx_filelist); + + /* There's nothing useful that our caller can do if this close fails. */ + if (close_dbp != NULL) + (void)__db_close(close_dbp, NULL, DB_NOSYNC); + + return (ret); +} + +/* + * __dbreg_revoke_id -- + * Take a log id away from a dbp, in preparation for closing it, + * but without logging the close. + * + * PUBLIC: int __dbreg_revoke_id __P((DB *, int, int32_t)); + */ +int +__dbreg_revoke_id(dbp, have_lock, force_id) + DB *dbp; + int have_lock; + int32_t force_id; +{ + DB_REP *db_rep; + ENV *env; + int push; + + env = dbp->env; + + /* + * If we are not in recovery but the file was opened for a recovery + * operation, then this process aborted a transaction for another + * process and the id may still be in use, so don't reuse this id. + * If our fid generation in replication has changed, this fid + * should not be reused + */ + db_rep = env->rep_handle; + push = (!F_ISSET(dbp, DB_AM_RECOVER) || IS_RECOVERING(env)) && + (!REP_ON(env) || ((REP *)db_rep->region)->gen == dbp->fid_gen); + + return (__dbreg_revoke_id_int(dbp->env, + dbp->log_filename, have_lock, push, force_id)); +} +/* + * __dbreg_revoke_id_int -- + * Revoke a log, in preparation for closing it, but without logging + * the close. + * + * PUBLIC: int __dbreg_revoke_id_int + * PUBLIC: __P((ENV *, FNAME *, int, int, int32_t)); + */ +int +__dbreg_revoke_id_int(env, fnp, have_lock, push, force_id) + ENV *env; + FNAME *fnp; + int have_lock, push; + int32_t force_id; +{ + DB_LOG *dblp; + LOG *lp; + int32_t id; + int ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ret = 0; + + /* If we lack an ID, this is a null-op. */ + if (fnp == NULL) + return (0); + + /* + * If we have a force_id, we had an error after allocating + * the id, and putting it on the fq list, but before we + * finished setting up fnp. So, if we have a force_id use it. + */ + if (force_id != DB_LOGFILEID_INVALID) + id = force_id; + else if (fnp->id == DB_LOGFILEID_INVALID) { + if (fnp->old_id == DB_LOGFILEID_INVALID) + return (0); + id = fnp->old_id; + } else + id = fnp->id; + if (!have_lock) + MUTEX_LOCK(env, lp->mtx_filelist); + + fnp->id = DB_LOGFILEID_INVALID; + fnp->old_id = DB_LOGFILEID_INVALID; + + /* Remove the FNAME from the list of open files. */ + SH_TAILQ_REMOVE(&lp->fq, fnp, q, __fname); + + /* + * This FNAME may be for a DBP which is already closed. Its ID may + * still be in use by an aborting transaction. If not, + * remove this id from the dbentry table and push it onto the + * free list. + */ + if (!F_ISSET(fnp, DB_FNAME_CLOSED) && + (ret = __dbreg_rem_dbentry(dblp, id)) == 0 && push) + ret = __dbreg_push_id(env, id); + + if (!have_lock) + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (ret); +} + +/* + * __dbreg_close_id -- + * Take a dbreg id away from a dbp that we're closing, and log + * the unregistry if the refcount goes to 0. + * + * PUBLIC: int __dbreg_close_id __P((DB *, DB_TXN *, u_int32_t)); + */ +int +__dbreg_close_id(dbp, txn, op) + DB *dbp; + DB_TXN *txn; + u_int32_t op; +{ + DB_LOG *dblp; + ENV *env; + FNAME *fnp; + LOG *lp; + int ret, t_ret; + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + /* If we lack an ID, this is a null-op. */ + if (fnp == NULL) + return (0); + + if (fnp->id == DB_LOGFILEID_INVALID) { + ret = __dbreg_revoke_id(dbp, 0, DB_LOGFILEID_INVALID); + goto done; + } + + /* + * If we are the last reference to this db then we need to log it + * as closed. Otherwise the last transaction will do the logging. + * Remove the DBP from the db entry table since it can nolonger + * be used. If we abort it will have to be reopened. + */ + ret = 0; + DB_ASSERT(env, fnp->txn_ref > 0); + if (fnp->txn_ref > 1) { + MUTEX_LOCK(env, dbp->mutex); + if (fnp->txn_ref > 1) { + if (!F_ISSET(fnp, DB_FNAME_CLOSED) && + (t_ret = __dbreg_rem_dbentry( + env->lg_handle, fnp->id)) != 0 && ret == 0) + ret = t_ret; + + /* + * The DB handle has been closed in the logging system. + * Transactions may still have a ref to this name. + * Mark it so that if recovery reopens the file id + * the transaction will not close the wrong handle. + */ + F_SET(fnp, DB_FNAME_CLOSED); + fnp->txn_ref--; + MUTEX_UNLOCK(env, dbp->mutex); + /* The mutex now lives only in the FNAME. */ + dbp->mutex = MUTEX_INVALID; + dbp->log_filename = NULL; + goto no_log; + } + } + MUTEX_LOCK(env, lp->mtx_filelist); + + if ((ret = __dbreg_log_close(env, fnp, txn, op)) != 0) + goto err; + ret = __dbreg_revoke_id(dbp, 1, DB_LOGFILEID_INVALID); + +err: MUTEX_UNLOCK(env, lp->mtx_filelist); + +done: if ((t_ret = __dbreg_teardown(dbp)) != 0 && ret == 0) + ret = t_ret; +no_log: + return (ret); +} +/* + * __dbreg_close_id_int -- + * Close down a dbreg id and log the unregistry. This is called only + * when a transaction has the last ref to the fname. + * + * PUBLIC: int __dbreg_close_id_int __P((ENV *, FNAME *, u_int32_t, int)); + */ +int +__dbreg_close_id_int(env, fnp, op, locked) + ENV *env; + FNAME *fnp; + u_int32_t op; + int locked; +{ + DB_LOG *dblp; + LOG *lp; + int ret, t_ret; + + DB_ASSERT(env, fnp->txn_ref == 1); + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + if (fnp->id == DB_LOGFILEID_INVALID) + return (__dbreg_revoke_id_int(env, + fnp, locked, 1, DB_LOGFILEID_INVALID)); + + if (F_ISSET(fnp, DB_FNAME_RECOVER)) + return (__dbreg_close_file(env, fnp)); + /* + * If log_close fails then it will mark the name DB_FNAME_NOTLOGGED + * and the id must persist. + */ + if (!locked) + MUTEX_LOCK(env, lp->mtx_filelist); + if ((ret = __dbreg_log_close(env, fnp, NULL, op)) != 0) + goto err; + + ret = __dbreg_revoke_id_int(env, fnp, 1, 1, DB_LOGFILEID_INVALID); + +err: if (!locked) + MUTEX_UNLOCK(env, lp->mtx_filelist); + + if ((t_ret = __dbreg_teardown_int(env, fnp)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __dbreg_failchk -- + * + * Look for entries that belong to dead processes and either close them + * out or, if there are pending transactions, just remove the mutex which + * will get discarded later. + * + * PUBLIC: int __dbreg_failchk __P((ENV *)); + */ +int +__dbreg_failchk(env) + ENV *env; +{ + DB_ENV *dbenv; + DB_LOG *dblp; + FNAME *fnp, *nnp; + LOG *lp; + int ret, t_ret; + char buf[DB_THREADID_STRLEN]; + + if ((dblp = env->lg_handle) == NULL) + return (0); + + lp = dblp->reginfo.primary; + dbenv = env->dbenv; + ret = 0; + + MUTEX_LOCK(env, lp->mtx_filelist); + for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); fnp != NULL; fnp = nnp) { + nnp = SH_TAILQ_NEXT(fnp, q, __fname); + if (dbenv->is_alive(dbenv, fnp->pid, 0, DB_MUTEX_PROCESS_ONLY)) + continue; + MUTEX_LOCK(env, fnp->mutex); + __db_msg(env, + "Freeing log information for process: %s, (ref %lu)", + dbenv->thread_id_string(dbenv, fnp->pid, 0, buf), + (u_long)fnp->txn_ref); + if (fnp->txn_ref > 1 || F_ISSET(fnp, DB_FNAME_CLOSED)) { + if (!F_ISSET(fnp, DB_FNAME_CLOSED)) { + fnp->txn_ref--; + F_SET(fnp, DB_FNAME_CLOSED); + } + MUTEX_UNLOCK(env, fnp->mutex); + fnp->mutex = MUTEX_INVALID; + fnp->pid = 0; + } else { + F_SET(fnp, DB_FNAME_CLOSED); + if ((t_ret = __dbreg_close_id_int(env, + fnp, DBREG_CLOSE, 1)) && ret == 0) + ret = t_ret; + } + } + + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (ret); +} +/* + * __dbreg_log_close -- + * + * Log a close of a database. Called when closing a file or when a + * replication client is becoming a master. That closes all the + * files it previously had open. + * + * Assumes caller holds the lp->mutex_filelist lock already. + * + * PUBLIC: int __dbreg_log_close __P((ENV *, FNAME *, + * PUBLIC: DB_TXN *, u_int32_t)); + */ +int +__dbreg_log_close(env, fnp, txn, op) + ENV *env; + FNAME *fnp; + DB_TXN *txn; + u_int32_t op; +{ + DBT fid_dbt, r_name, *dbtp; + DB_LOG *dblp; + DB_LSN r_unused; + int ret; + + dblp = env->lg_handle; + ret = 0; + + if (fnp->fname_off == INVALID_ROFF) + dbtp = NULL; + else { + memset(&r_name, 0, sizeof(r_name)); + r_name.data = R_ADDR(&dblp->reginfo, fnp->fname_off); + r_name.size = (u_int32_t)strlen((char *)r_name.data) + 1; + dbtp = &r_name; + } + memset(&fid_dbt, 0, sizeof(fid_dbt)); + fid_dbt.data = fnp->ufid; + fid_dbt.size = DB_FILE_ID_LEN; + if ((ret = __dbreg_register_log(env, txn, &r_unused, + F_ISSET(fnp, DB_FNAME_DURABLE) ? 0 : DB_LOG_NOT_DURABLE, + op, dbtp, &fid_dbt, fnp->id, + fnp->s_type, fnp->meta_pgno, TXN_INVALID)) != 0) { + /* + * We are trying to close, but the log write failed. + * Unfortunately, close needs to plow forward, because + * the application can't do anything with the handle. + * Make the entry in the shared memory region so that + * when we close the environment, we know that this + * happened. Also, make sure we remove this from the + * per-process table, so that we don't try to close it + * later. + */ + F_SET(fnp, DB_FNAME_NOTLOGGED); + (void)__dbreg_rem_dbentry(dblp, fnp->id); + } + return (ret); +} + +/* + * __dbreg_push_id and __dbreg_pop_id -- + * Dbreg ids from closed files are kept on a stack in shared memory + * for recycling. (We want to reuse them as much as possible because each + * process keeps open files in an array by ID.) Push them to the stack and + * pop them from it, managing memory as appropriate. + * + * The stack is protected by the mtx_filelist, and both functions assume it + * is already locked. + */ +static int +__dbreg_push_id(env, id) + ENV *env; + int32_t id; +{ + DB_LOG *dblp; + LOG *lp; + REGINFO *infop; + int32_t *stack, *newstack; + int ret; + + dblp = env->lg_handle; + infop = &dblp->reginfo; + lp = infop->primary; + + if (id == lp->fid_max - 1) { + lp->fid_max--; + return (0); + } + + /* Check if we have room on the stack. */ + if (lp->free_fid_stack == INVALID_ROFF || + lp->free_fids_alloced <= lp->free_fids + 1) { + LOG_SYSTEM_LOCK(env); + if ((ret = __env_alloc(infop, + (lp->free_fids_alloced + 20) * sizeof(u_int32_t), + &newstack)) != 0) { + LOG_SYSTEM_UNLOCK(env); + return (ret); + } + + if (lp->free_fid_stack != INVALID_ROFF) { + stack = R_ADDR(infop, lp->free_fid_stack); + memcpy(newstack, stack, + lp->free_fids_alloced * sizeof(u_int32_t)); + __env_alloc_free(infop, stack); + } + lp->free_fid_stack = R_OFFSET(infop, newstack); + lp->free_fids_alloced += 20; + LOG_SYSTEM_UNLOCK(env); + } + + stack = R_ADDR(infop, lp->free_fid_stack); + stack[lp->free_fids++] = id; + return (0); +} + +static int +__dbreg_pop_id(env, id) + ENV *env; + int32_t *id; +{ + DB_LOG *dblp; + LOG *lp; + int32_t *stack; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + /* Do we have anything to pop? */ + if (lp->free_fid_stack != INVALID_ROFF && lp->free_fids > 0) { + stack = R_ADDR(&dblp->reginfo, lp->free_fid_stack); + *id = stack[--lp->free_fids]; + } else + *id = DB_LOGFILEID_INVALID; + + return (0); +} + +/* + * __dbreg_pluck_id -- + * Remove a particular dbreg id from the stack of free ids. This is + * used when we open a file, as in recovery, with a specific ID that might + * be on the stack. + * + * Returns success whether or not the particular id was found, and like + * push and pop, assumes that the mtx_filelist is locked. + */ +static int +__dbreg_pluck_id(env, id) + ENV *env; + int32_t id; +{ + DB_LOG *dblp; + LOG *lp; + int32_t *stack; + u_int i; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + if (id >= lp->fid_max) + return (0); + + /* Do we have anything to look at? */ + if (lp->free_fid_stack != INVALID_ROFF) { + stack = R_ADDR(&dblp->reginfo, lp->free_fid_stack); + for (i = 0; i < lp->free_fids; i++) + if (id == stack[i]) { + /* + * Found it. Overwrite it with the top + * id (which may harmlessly be itself), + * and shorten the stack by one. + */ + stack[i] = stack[lp->free_fids - 1]; + lp->free_fids--; + return (0); + } + } + + return (0); +} + +/* + * __dbreg_log_id -- + * Used for in-memory named files. They are created in mpool and + * are given id's early in the open process so that we can read and + * create pages in the mpool for the files. However, at the time that + * the mpf is created, the file may not be fully created and/or its + * meta-data may not be fully known, so we can't do a full dbregister. + * This is a routine exported that will log a complete dbregister + * record that will allow for both recovery and replication. + * + * PUBLIC: int __dbreg_log_id __P((DB *, DB_TXN *, int32_t, int)); + */ +int +__dbreg_log_id(dbp, txn, id, needlock) + DB *dbp; + DB_TXN *txn; + int32_t id; + int needlock; +{ + DBT fid_dbt, r_name; + DB_LOG *dblp; + DB_LSN unused; + ENV *env; + FNAME *fnp; + LOG *lp; + u_int32_t op; + int i, ret; + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + /* + * Verify that the fnp has been initialized, by seeing if it + * has any non-zero bytes in it. + */ + for (i = 0; i < DB_FILE_ID_LEN; i++) + if (fnp->ufid[i] != 0) + break; + if (i == DB_FILE_ID_LEN) + memcpy(fnp->ufid, dbp->fileid, DB_FILE_ID_LEN); + + if (fnp->s_type == DB_UNKNOWN) + fnp->s_type = dbp->type; + + /* + * Log the registry. We should only request a new ID in situations + * where logging is reasonable. + */ + memset(&fid_dbt, 0, sizeof(fid_dbt)); + memset(&r_name, 0, sizeof(r_name)); + + if (needlock) + MUTEX_LOCK(env, lp->mtx_filelist); + + if (fnp->fname_off != INVALID_ROFF) { + r_name.data = R_ADDR(&dblp->reginfo, fnp->fname_off); + r_name.size = (u_int32_t)strlen((char *)r_name.data) + 1; + } + + fid_dbt.data = dbp->fileid; + fid_dbt.size = DB_FILE_ID_LEN; + + op = !F_ISSET(dbp, DB_AM_OPEN_CALLED) ? DBREG_PREOPEN : + (F_ISSET(dbp, DB_AM_INMEM) ? DBREG_REOPEN : DBREG_OPEN); + ret = __dbreg_register_log(env, txn, &unused, + F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0, + op, r_name.size == 0 ? NULL : &r_name, &fid_dbt, id, + fnp->s_type, fnp->meta_pgno, fnp->create_txnid); + + if (needlock) + MUTEX_UNLOCK(env, lp->mtx_filelist); + + return (ret); +} diff --git a/db-4.8.30/dbreg/dbreg.src b/db-4.8.30/dbreg/dbreg.src new file mode 100644 index 0000000..02b2542 --- /dev/null +++ b/db-4.8.30/dbreg/dbreg.src @@ -0,0 +1,38 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996-2009 Oracle. All rights reserved. + * + * $Id$ + */ + +DBPRIVATE +PREFIX __dbreg + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/log.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE + +/* + * Used for registering name/id translations at open or close. + * opcode: register or unregister + * name: file name + * fileid: unique file id + * ftype: file type + * ftype: database type + * id: transaction id of the subtransaction that created the fs object + */ +BEGIN register 42 2 +ARG opcode u_int32_t lu +DBT name DBT s +DBT uid DBT s +ARG fileid int32_t ld +ARG ftype DBTYPE lx +ARG meta_pgno db_pgno_t lu +ARG id u_int32_t lx +END diff --git a/db-4.8.30/dbreg/dbreg_auto.c b/db-4.8.30/dbreg/dbreg_auto.c new file mode 100644 index 0000000..bfd90ba --- /dev/null +++ b/db-4.8.30/dbreg/dbreg_auto.c @@ -0,0 +1,282 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __dbreg_register_read __P((ENV *, void *, + * PUBLIC: __dbreg_register_args **)); + */ +int +__dbreg_register_read(env, recbuf, argpp) + ENV *env; + void *recbuf; + __dbreg_register_args **argpp; +{ + __dbreg_register_args *argp; + u_int32_t uinttmp; + u_int8_t *bp; + int ret; + + if ((ret = __os_malloc(env, + sizeof(__dbreg_register_args) + sizeof(DB_TXN), &argp)) != 0) + return (ret); + bp = recbuf; + argp->txnp = (DB_TXN *)&argp[1]; + memset(argp->txnp, 0, sizeof(DB_TXN)); + + LOGCOPY_32(env, &argp->type, bp); + bp += sizeof(argp->type); + + LOGCOPY_32(env, &argp->txnp->txnid, bp); + bp += sizeof(argp->txnp->txnid); + + LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); + bp += sizeof(DB_LSN); + + LOGCOPY_32(env, &argp->opcode, bp); + bp += sizeof(argp->opcode); + + memset(&argp->name, 0, sizeof(argp->name)); + LOGCOPY_32(env,&argp->name.size, bp); + bp += sizeof(u_int32_t); + argp->name.data = bp; + bp += argp->name.size; + + memset(&argp->uid, 0, sizeof(argp->uid)); + LOGCOPY_32(env,&argp->uid.size, bp); + bp += sizeof(u_int32_t); + argp->uid.data = bp; + bp += argp->uid.size; + + LOGCOPY_32(env, &uinttmp, bp); + argp->fileid = (int32_t)uinttmp; + bp += sizeof(uinttmp); + + LOGCOPY_32(env, &uinttmp, bp); + argp->ftype = (DBTYPE)uinttmp; + bp += sizeof(uinttmp); + + LOGCOPY_32(env, &uinttmp, bp); + argp->meta_pgno = (db_pgno_t)uinttmp; + bp += sizeof(uinttmp); + + LOGCOPY_32(env, &argp->id, bp); + bp += sizeof(argp->id); + + *argpp = argp; + return (ret); +} + +/* + * PUBLIC: int __dbreg_register_log __P((ENV *, DB_TXN *, DB_LSN *, + * PUBLIC: u_int32_t, u_int32_t, const DBT *, const DBT *, int32_t, DBTYPE, + * PUBLIC: db_pgno_t, u_int32_t)); + */ +int +__dbreg_register_log(env, txnp, ret_lsnp, flags, + opcode, name, uid, fileid, ftype, meta_pgno, + id) + ENV *env; + DB_TXN *txnp; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t opcode; + const DBT *name; + const DBT *uid; + int32_t fileid; + DBTYPE ftype; + db_pgno_t meta_pgno; + u_int32_t id; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn, *rlsnp; + DB_TXNLOGREC *lr; + u_int32_t zero, uinttmp, rectype, txn_num; + u_int npad; + u_int8_t *bp; + int is_durable, ret; + + COMPQUIET(lr, NULL); + + rlsnp = ret_lsnp; + rectype = DB___dbreg_register; + npad = 0; + ret = 0; + + if (LF_ISSET(DB_LOG_NOT_DURABLE)) { + if (txnp == NULL) + return (0); + is_durable = 0; + } else + is_durable = 1; + + if (txnp == NULL) { + txn_num = 0; + lsnp = &null_lsn; + null_lsn.file = null_lsn.offset = 0; + } else { + if (TAILQ_FIRST(&txnp->kids) != NULL && + (ret = __txn_activekids(env, rectype, txnp)) != 0) + return (ret); + /* + * We need to assign begin_lsn while holding region mutex. + * That assignment is done inside the DbEnv->log_put call, + * so pass in the appropriate memory location to be filled + * in by the log_put code. + */ + DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); + txn_num = txnp->txnid; + } + + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + (name == NULL ? 0 : name->size) + + sizeof(u_int32_t) + (uid == NULL ? 0 : uid->size) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t); + if (CRYPTO_ON(env)) { + npad = env->crypto_handle->adj_size(logrec.size); + logrec.size += npad; + } + + if (is_durable || txnp == NULL) { + if ((ret = + __os_malloc(env, logrec.size, &logrec.data)) != 0) + return (ret); + } else { + if ((ret = __os_malloc(env, + logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) + return (ret); +#ifdef DIAGNOSTIC + if ((ret = + __os_malloc(env, logrec.size, &logrec.data)) != 0) { + __os_free(env, lr); + return (ret); + } +#else + logrec.data = lr->data; +#endif + } + if (npad > 0) + memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); + + bp = logrec.data; + + LOGCOPY_32(env, bp, &rectype); + bp += sizeof(rectype); + + LOGCOPY_32(env, bp, &txn_num); + bp += sizeof(txn_num); + + LOGCOPY_FROMLSN(env, bp, lsnp); + bp += sizeof(DB_LSN); + + LOGCOPY_32(env, bp, &opcode); + bp += sizeof(opcode); + + if (name == NULL) { + zero = 0; + LOGCOPY_32(env, bp, &zero); + bp += sizeof(u_int32_t); + } else { + LOGCOPY_32(env, bp, &name->size); + bp += sizeof(name->size); + memcpy(bp, name->data, name->size); + bp += name->size; + } + + if (uid == NULL) { + zero = 0; + LOGCOPY_32(env, bp, &zero); + bp += sizeof(u_int32_t); + } else { + LOGCOPY_32(env, bp, &uid->size); + bp += sizeof(uid->size); + memcpy(bp, uid->data, uid->size); + bp += uid->size; + } + + uinttmp = (u_int32_t)fileid; + LOGCOPY_32(env,bp, &uinttmp); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)ftype; + LOGCOPY_32(env,bp, &uinttmp); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)meta_pgno; + LOGCOPY_32(env,bp, &uinttmp); + bp += sizeof(uinttmp); + + LOGCOPY_32(env, bp, &id); + bp += sizeof(id); + + DB_ASSERT(env, + (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + + if (is_durable || txnp == NULL) { + if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, + flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { + *lsnp = *rlsnp; + if (rlsnp != ret_lsnp) + *ret_lsnp = *rlsnp; + } + } else { + ret = 0; +#ifdef DIAGNOSTIC + /* + * Set the debug bit if we are going to log non-durable + * transactions so they will be ignored by recovery. + */ + memcpy(lr->data, logrec.data, logrec.size); + rectype |= DB_debug_FLAG; + LOGCOPY_32(env, logrec.data, &rectype); + + if (!IS_REP_CLIENT(env)) + ret = __log_put(env, + rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); +#endif + STAILQ_INSERT_HEAD(&txnp->logs, lr, links); + F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); + LSN_NOT_LOGGED(*ret_lsnp); + } + +#ifdef LOG_DIAGNOSTIC + if (ret != 0) + (void)__dbreg_register_print(env, + (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); +#endif + +#ifdef DIAGNOSTIC + __os_free(env, logrec.data); +#else + if (is_durable || txnp == NULL) + __os_free(env, logrec.data); +#endif + return (ret); +} + +/* + * PUBLIC: int __dbreg_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__dbreg_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __dbreg_register_recover, DB___dbreg_register)) != 0) + return (ret); + return (0); +} diff --git a/db-4.8.30/dbreg/dbreg_autop.c b/db-4.8.30/dbreg/dbreg_autop.c new file mode 100644 index 0000000..4fc69ce --- /dev/null +++ b/db-4.8.30/dbreg/dbreg_autop.c @@ -0,0 +1,78 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __dbreg_register_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__dbreg_register_print(env, dbtp, lsnp, notused2, notused3) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *notused3; +{ + __dbreg_register_args *argp; + u_int32_t i; + int ch; + int ret; + + notused2 = DB_TXN_PRINT; + notused3 = NULL; + + if ((ret = __dbreg_register_read(env, dbtp->data, &argp)) != 0) + return (ret); + (void)printf( + "[%lu][%lu]__dbreg_register%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, (u_long)lsnp->offset, + (argp->type & DB_debug_FLAG) ? "_debug" : "", + (u_long)argp->type, + (u_long)argp->txnp->txnid, + (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); + (void)printf("\topcode: %lu\n", (u_long)argp->opcode); + (void)printf("\tname: "); + for (i = 0; i < argp->name.size; i++) { + ch = ((u_int8_t *)argp->name.data)[i]; + printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); + } + (void)printf("\n"); + (void)printf("\tuid: "); + for (i = 0; i < argp->uid.size; i++) { + ch = ((u_int8_t *)argp->uid.data)[i]; + printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); + } + (void)printf("\n"); + (void)printf("\tfileid: %ld\n", (long)argp->fileid); + (void)printf("\tftype: 0x%lx\n", (u_long)argp->ftype); + (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno); + (void)printf("\tid: 0x%lx\n", (u_long)argp->id); + (void)printf("\n"); + __os_free(env, argp); + return (0); +} + +/* + * PUBLIC: int __dbreg_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__dbreg_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __dbreg_register_print, DB___dbreg_register)) != 0) + return (ret); + return (0); +} diff --git a/db-4.8.30/dbreg/dbreg_rec.c b/db-4.8.30/dbreg/dbreg_rec.c new file mode 100644 index 0000000..7cb5150 --- /dev/null +++ b/db-4.8.30/dbreg/dbreg_rec.c @@ -0,0 +1,397 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996-2009 Oracle. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" + +static int __dbreg_open_file __P((ENV *, + DB_TXN *, __dbreg_register_args *, void *)); + +/* + * PUBLIC: int __dbreg_register_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__dbreg_register_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __dbreg_register_args *argp; + DB_ENTRY *dbe; + DB_LOG *dblp; + DB *dbp; + u_int32_t status; + int do_close, do_open, do_rem, ret, t_ret; + + dblp = env->lg_handle; + dbp = NULL; + +#ifdef DEBUG_RECOVER + REC_PRINT(__dbreg_register_print); +#endif + do_open = do_close = 0; + if ((ret = __dbreg_register_read(env, dbtp->data, &argp)) != 0) + goto out; + + switch (argp->opcode) { + case DBREG_REOPEN: + case DBREG_PREOPEN: + case DBREG_OPEN: + /* + * In general, we redo the open on REDO and abort on UNDO. + * However, a reopen is a second instance of an open of + * in-memory files and we don't want to close them yet + * on abort, so just skip that here. + */ + if ((DB_REDO(op) || + op == DB_TXN_OPENFILES || op == DB_TXN_POPENFILES)) + do_open = 1; + else if (argp->opcode != DBREG_REOPEN) + do_close = 1; + break; + case DBREG_CLOSE: + if (DB_UNDO(op)) + do_open = 1; + else + do_close = 1; + break; + case DBREG_RCLOSE: + /* + * DBREG_RCLOSE was generated by recover because a file was + * left open. The POPENFILES pass, which is run to open + * files to abort prepared transactions, may not include the + * open for this file so we open it here. Note that a normal + * CLOSE is not legal before the prepared transaction is + * committed or aborted. + */ + if (DB_UNDO(op) || op == DB_TXN_POPENFILES) + do_open = 1; + else + do_close = 1; + break; + case DBREG_CHKPNT: + if (DB_UNDO(op) || + op == DB_TXN_OPENFILES || op == DB_TXN_POPENFILES) + do_open = 1; + break; + default: + ret = __db_unknown_path(env, "__dbreg_register_recover"); + goto out; + } + + if (do_open) { + /* + * We must open the db even if the meta page is not + * yet written as we may be creating subdatabase. + */ + if (op == DB_TXN_OPENFILES && argp->opcode != DBREG_CHKPNT) + F_SET(dblp, DBLOG_FORCE_OPEN); + + /* + * During an abort or an open pass to recover prepared txns, + * we need to make sure that we use the same locker id on the + * open. We pass the txnid along to ensure this. + */ + ret = __dbreg_open_file(env, + op == DB_TXN_ABORT || op == DB_TXN_POPENFILES ? + argp->txnp : NULL, argp, info); + if (ret == DB_PAGE_NOTFOUND && argp->meta_pgno != PGNO_BASE_MD) + ret = ENOENT; + if (ret == ENOENT || ret == EINVAL) { + /* + * If this is an OPEN while rolling forward, it's + * possible that the file was recreated since last + * time we got here. In that case, we've got deleted + * set and probably shouldn't, so we need to check + * for that case and possibly retry. + */ + if (DB_REDO(op) && argp->txnp != 0 && + dblp->dbentry[argp->fileid].deleted) { + dblp->dbentry[argp->fileid].deleted = 0; + ret = + __dbreg_open_file(env, NULL, argp, info); + if (ret == DB_PAGE_NOTFOUND && + argp->meta_pgno != PGNO_BASE_MD) + ret = ENOENT; + } + /* + * We treat ENOENT as OK since it's possible that + * the file was renamed or deleted. + * All other errors, we return. + */ + if (ret == ENOENT) + ret = 0; + } + F_CLR(dblp, DBLOG_FORCE_OPEN); + } + + if (do_close) { + /* + * If we are undoing an open, or redoing a close, + * then we need to close the file. If we are simply + * revoking then we just need to grab the DBP and revoke + * the log id. + * + * If the file is deleted, then we can just ignore this close. + * Otherwise, we should usually have a valid dbp we should + * close or whose reference count should be decremented. + * However, if we shut down without closing a file, we may, in + * fact, not have the file open, and that's OK. + */ + do_rem = 0; + MUTEX_LOCK(env, dblp->mtx_dbreg); + if (argp->fileid < dblp->dbentry_cnt) { + /* + * Typically, closes should match an open which means + * that if this is a close, there should be a valid + * entry in the dbentry table when we get here, + * however there are exceptions. 1. If this is an + * OPENFILES pass, then we may have started from + * a log file other than the first, and the + * corresponding open appears in an earlier file. + * 2. If we are undoing an open on an abort or + * recovery, it's possible that we failed after + * the log record, but before we actually entered + * a handle here. + * 3. If we aborted an open, then we wrote a non-txnal + * RCLOSE into the log. During the forward pass, the + * file won't be open, and that's OK. + */ + dbe = &dblp->dbentry[argp->fileid]; + if (dbe->dbp == NULL && !dbe->deleted) { + /* No valid entry here. Nothing to do. */ + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + goto done; + } + + /* We have either an open entry or a deleted entry. */ + if ((dbp = dbe->dbp) != NULL) { + /* + * If we're a replication client, it's + * possible to get here with a dbp that + * the user opened, but which we later + * assigned a fileid to. Be sure that + * we only close dbps that we opened in + * the recovery code or that were opened + * inside a currently aborting transaction + * but not by the recovery code. + */ + do_rem = F_ISSET(dbp, DB_AM_RECOVER) ? + op != DB_TXN_ABORT : op == DB_TXN_ABORT; + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + } else if (dbe->deleted) { + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + if ((ret = __dbreg_rem_dbentry( + dblp, argp->fileid)) != 0) + goto out; + } + } else + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + + /* + * During recovery, all files are closed. On an abort, we only + * close the file if we opened it during the abort + * (DB_AM_RECOVER set), otherwise we simply do a __db_refresh. + * For the close case, if remove or rename has closed the file, + * don't request a sync, because a NULL mpf would be a problem. + * + * If we are undoing a create we'd better discard any buffers + * from the memory pool. We identify creates because the + * argp->id field contains the transaction containing the file + * create; if that id is invalid, we are not creating. + * + * On the backward pass, we need to "undo" opens even if the + * transaction in which they appeared committed, because we have + * already undone the corresponding close. In that case, the + * id will be valid, but we do not want to discard buffers. + */ + if (do_rem && dbp != NULL) { + if (argp->id != TXN_INVALID) { + if ((ret = __db_txnlist_find(env, + info, argp->txnp->txnid, &status)) + != DB_NOTFOUND && ret != 0) + goto out; + if (ret == DB_NOTFOUND || status != TXN_COMMIT) + F_SET(dbp, DB_AM_DISCARD); + ret = 0; + } + + if (op == DB_TXN_ABORT) { + if ((t_ret = __db_refresh(dbp, + NULL, DB_NOSYNC, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + } else { + if ((t_ret = __db_close( + dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + } + } + } +done: if (ret == 0) + *lsnp = argp->prev_lsn; +out: if (argp != NULL) + __os_free(env, argp); + return (ret); +} + +/* + * __dbreg_open_file -- + * Called during log_register recovery. Make sure that we have an + * entry in the dbentry table for this ndx. Returns 0 on success, + * non-zero on error. + */ +static int +__dbreg_open_file(env, txn, argp, info) + ENV *env; + DB_TXN *txn; + __dbreg_register_args *argp; + void *info; +{ + DB *dbp; + DB_ENTRY *dbe; + DB_LOG *dblp; + u_int32_t id, status; + int ret; + + dblp = env->lg_handle; + + /* + * When we're opening, we have to check that the name we are opening + * is what we expect. If it's not, then we close the old file and + * open the new one. + */ + MUTEX_LOCK(env, dblp->mtx_dbreg); + if (argp->fileid != DB_LOGFILEID_INVALID && + argp->fileid < dblp->dbentry_cnt) + dbe = &dblp->dbentry[argp->fileid]; + else + dbe = NULL; + + if (dbe != NULL) { + if (dbe->deleted) { + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + return (ENOENT); + } + + /* + * At the end of OPENFILES, we may have a file open. If this + * is a reopen, then we will always close and reopen. If the + * open was part of a committed transaction, so it doesn't + * get undone. However, if the fileid was previously used, + * we'll see a close that may need to get undone. There are + * three ways we can detect this. 1) the meta-pgno in the + * current file does not match that of the open file, 2) the + * file uid of the current file does not match that of the + * previously opened file, 3) the current file is unnamed, in + * which case it should never be opened during recovery. + * It is also possible that the db open previously failed + * because the file was missing. Check the DB_AM_OPEN_CALLED + * bit and try to open it again. + */ + if ((dbp = dbe->dbp) != NULL) { + if (argp->opcode == DBREG_REOPEN || + !F_ISSET(dbp, DB_AM_OPEN_CALLED) || + dbp->meta_pgno != argp->meta_pgno || + argp->name.size == 0 || + memcmp(dbp->fileid, argp->uid.data, + DB_FILE_ID_LEN) != 0) { + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + (void)__dbreg_revoke_id(dbp, 0, + DB_LOGFILEID_INVALID); + if (F_ISSET(dbp, DB_AM_RECOVER)) + (void)__db_close(dbp, NULL, DB_NOSYNC); + goto reopen; + } + + /* + * We should only get here if we already have the + * dbp from an openfiles pass, in which case, what's + * here had better be the same dbp. + */ + DB_ASSERT(env, dbe->dbp == dbp); + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + + /* + * This is a successful open. We need to record that + * in the txnlist so that we know how to handle the + * subtransaction that created the file system object. + */ + if (argp->id != TXN_INVALID && + (ret = __db_txnlist_update(env, info, + argp->id, TXN_EXPECTED, NULL, &status, 1)) != 0) + return (ret); + return (0); + } + } + + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + +reopen: + /* + * We never re-open temporary files. Temp files are only useful during + * aborts in which case the dbp was entered when the file was + * registered. During recovery, we treat temp files as properly deleted + * files, allowing the open to fail and not reporting any errors when + * recovery fails to get a valid dbp from __dbreg_id_to_db. + */ + if (argp->name.size == 0) { + (void)__dbreg_add_dbentry(env, dblp, NULL, argp->fileid); + return (ENOENT); + } + + /* + * We are about to pass a recovery txn pointer into the main library. + * We need to make sure that any accessed fields are set appropriately. + */ + if (txn != NULL) { + id = txn->txnid; + memset(txn, 0, sizeof(DB_TXN)); + txn->txnid = id; + txn->mgrp = env->tx_handle; + } + + return (__dbreg_do_open(env, + txn, dblp, argp->uid.data, argp->name.data, argp->ftype, + argp->fileid, argp->meta_pgno, info, argp->id, argp->opcode)); +} diff --git a/db-4.8.30/dbreg/dbreg_stat.c b/db-4.8.30/dbreg/dbreg_stat.c new file mode 100644 index 0000000..57e7f9c --- /dev/null +++ b/db-4.8.30/dbreg/dbreg_stat.c @@ -0,0 +1,135 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2009 Oracle. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" + +#ifdef HAVE_STATISTICS +static int __dbreg_print_all __P((ENV *, u_int32_t)); + +/* + * __dbreg_stat_print -- + * Print the dbreg statistics. + * + * PUBLIC: int __dbreg_stat_print __P((ENV *, u_int32_t)); + */ +int +__dbreg_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + int ret; + + if (LF_ISSET(DB_STAT_ALL) && + (ret = __dbreg_print_all(env, flags)) != 0) + return (ret); + + return (0); +} + +/* + * __dbreg_print_fname -- + * Display the contents of an FNAME structure. + * + * PUBLIC: void __dbreg_print_fname __P((ENV *, FNAME *)); + */ +void +__dbreg_print_fname(env, fnp) + ENV *env; + FNAME *fnp; +{ + static const FN fn[] = { + { DB_FNAME_DURABLE, "DB_FNAME_DURABLE" }, + { DB_FNAME_NOTLOGGED, "DB_FNAME_NOTLOGGED" }, + { 0, NULL } + }; + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB handle FNAME contents:"); + STAT_LONG("log ID", fnp->id); + STAT_ULONG("Meta pgno", fnp->meta_pgno); + __db_print_fileid(env, fnp->ufid, "\tFile ID"); + STAT_ULONG("create txn", fnp->create_txnid); + __db_prflags(env, NULL, fnp->flags, fn, NULL, "\tFlags"); +} + +/* + * __dbreg_print_all -- + * Display the ENV's list of files. + */ +static int +__dbreg_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + DB *dbp; + DB_LOG *dblp; + FNAME *fnp; + LOG *lp; + int32_t *stack; + int del, first; + u_int32_t i; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + __db_msg(env, "LOG FNAME list:"); + __mutex_print_debug_single( + env, "File name mutex", lp->mtx_filelist, flags); + + STAT_LONG("Fid max", lp->fid_max); + STAT_LONG("Log buffer size", lp->buffer_size); + + MUTEX_LOCK(env, lp->mtx_filelist); + first = 1; + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) { + if (first) { + first = 0; + __db_msg(env, + "ID\tName\t\tType\tPgno\tPid\tTxnid\tFlags\tDBP-info"); + } + dbp = fnp->id >= dblp->dbentry_cnt ? NULL : + dblp->dbentry[fnp->id].dbp; + del = fnp->id >= dblp->dbentry_cnt ? 0 : + dblp->dbentry[fnp->id].deleted; + __db_msg(env, + "%ld\t%-8s%s%-8s%s\t%lu\t%lu\t%lx\t%lx\t%s (%d %lx %lx)", + (long)fnp->id, + fnp->fname_off == INVALID_ROFF ? + "" : (char *)R_ADDR(&dblp->reginfo, fnp->fname_off), + fnp->dname_off == INVALID_ROFF ? "" : ":", + fnp->dname_off == INVALID_ROFF ? + "" : (char *)R_ADDR(&dblp->reginfo, fnp->dname_off), + __db_dbtype_to_string(fnp->s_type), + (u_long)fnp->meta_pgno, (u_long)fnp->pid, + (u_long)fnp->create_txnid, (u_long)fnp->flags, + dbp == NULL ? "No DBP" : "DBP", del, P_TO_ULONG(dbp), + (u_long)(dbp == NULL ? 0 : dbp->flags)); + } + MUTEX_UNLOCK(env, lp->mtx_filelist); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "LOG region list of free IDs."); + if (lp->free_fid_stack == INVALID_ROFF) + __db_msg(env, "Free id stack is empty."); + else { + STAT_ULONG("Free id array size", lp->free_fids_alloced); + STAT_ULONG("Number of ids on the free stack", lp->free_fids); + stack = R_ADDR(&dblp->reginfo, lp->free_fid_stack); + for (i = 0; i < lp->free_fids; i++) + STAT_LONG("fid", stack[i]); + } + + return (0); +} +#endif diff --git a/db-4.8.30/dbreg/dbreg_util.c b/db-4.8.30/dbreg/dbreg_util.c new file mode 100644 index 0000000..e541dd8 --- /dev/null +++ b/db-4.8.30/dbreg/dbreg_util.c @@ -0,0 +1,810 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2009 Oracle. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/fop.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __dbreg_check_master __P((ENV *, u_int8_t *, char *)); + +/* + * __dbreg_add_dbentry -- + * Adds a DB entry to the dbreg DB entry table. + * + * PUBLIC: int __dbreg_add_dbentry __P((ENV *, DB_LOG *, DB *, int32_t)); + */ +int +__dbreg_add_dbentry(env, dblp, dbp, ndx) + ENV *env; + DB_LOG *dblp; + DB *dbp; + int32_t ndx; +{ + int32_t i; + int ret; + + ret = 0; + + MUTEX_LOCK(env, dblp->mtx_dbreg); + + /* + * Check if we need to grow the table. Note, ndx is 0-based (the + * index into the DB entry table) an dbentry_cnt is 1-based, the + * number of available slots. + */ + if (dblp->dbentry_cnt <= ndx) { + if ((ret = __os_realloc(env, + (size_t)(ndx + DB_GROW_SIZE) * sizeof(DB_ENTRY), + &dblp->dbentry)) != 0) + goto err; + + /* Initialize the new entries. */ + for (i = dblp->dbentry_cnt; i < ndx + DB_GROW_SIZE; i++) { + dblp->dbentry[i].dbp = NULL; + dblp->dbentry[i].deleted = 0; + } + dblp->dbentry_cnt = i; + } + + DB_ASSERT(env, dblp->dbentry[ndx].dbp == NULL); + dblp->dbentry[ndx].deleted = dbp == NULL; + dblp->dbentry[ndx].dbp = dbp; + +err: MUTEX_UNLOCK(env, dblp->mtx_dbreg); + return (ret); +} + +/* + * __dbreg_rem_dbentry + * Remove an entry from the DB entry table. + * + * PUBLIC: int __dbreg_rem_dbentry __P((DB_LOG *, int32_t)); + */ +int +__dbreg_rem_dbentry(dblp, ndx) + DB_LOG *dblp; + int32_t ndx; +{ + MUTEX_LOCK(dblp->env, dblp->mtx_dbreg); + if (dblp->dbentry_cnt > ndx) { + dblp->dbentry[ndx].dbp = NULL; + dblp->dbentry[ndx].deleted = 0; + } + MUTEX_UNLOCK(dblp->env, dblp->mtx_dbreg); + + return (0); +} + +/* + * __dbreg_log_files -- + * Put a DBREG_CHKPNT/CLOSE log record for each open database. + * + * PUBLIC: int __dbreg_log_files __P((ENV *, u_int32_t)); + */ +int +__dbreg_log_files(env, opcode) + ENV *env; + u_int32_t opcode; +{ + DBT *dbtp, fid_dbt, t; + DB_LOG *dblp; + DB_LSN r_unused; + FNAME *fnp; + LOG *lp; + int ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + ret = 0; + + MUTEX_LOCK(env, lp->mtx_filelist); + + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) { + /* This id was revoked by a switch in replication master. */ + if (fnp->id == DB_LOGFILEID_INVALID) + continue; + if (fnp->fname_off == INVALID_ROFF) + dbtp = NULL; + else { + memset(&t, 0, sizeof(t)); + t.data = R_ADDR(&dblp->reginfo, fnp->fname_off); + t.size = (u_int32_t)strlen(t.data) + 1; + dbtp = &t; + } + memset(&fid_dbt, 0, sizeof(fid_dbt)); + fid_dbt.data = fnp->ufid; + fid_dbt.size = DB_FILE_ID_LEN; + /* + * Output DBREG_CHKPNT records which will be processed during + * the OPENFILES pass of recovery. At the end of recovery we + * want to output the files that were open so a future recovery + * run will have the correct files open during a backward pass. + * For this we output DBREG_RCLOSE records so the files will be + * closed on the forward pass. + */ + if ((ret = __dbreg_register_log(env, NULL, &r_unused, + F_ISSET(fnp, DB_FNAME_DURABLE) ? 0 : DB_LOG_NOT_DURABLE, + opcode, + dbtp, &fid_dbt, fnp->id, fnp->s_type, fnp->meta_pgno, + TXN_INVALID)) != 0) + break; + } + + MUTEX_UNLOCK(env, lp->mtx_filelist); + + return (ret); +} + +/* + * __dbreg_close_files -- + * Remove the id's of open files and actually close those + * files that were opened by the recovery daemon. We sync the + * file, unless its mpf pointer has been NULLed by a db_remove or + * db_rename. We may not have flushed the log_register record that + * closes the file. + * + * PUBLIC: int __dbreg_close_files __P((ENV *, int)); + */ +int +__dbreg_close_files(env, do_restored) + ENV *env; + int do_restored; +{ + DB *dbp; + DB_LOG *dblp; + int ret, t_ret; + int32_t i; + + /* If we haven't initialized logging, we have nothing to do. */ + if (!LOGGING_ON(env)) + return (0); + + dblp = env->lg_handle; + ret = 0; + + MUTEX_LOCK(env, dblp->mtx_dbreg); + for (i = 0; i < dblp->dbentry_cnt; i++) { + /* + * We only want to close dbps that recovery opened. Any + * dbps that weren't opened by recovery but show up here + * are about to be unconditionally removed from the table. + * Before doing so, we need to revoke their log fileids + * so that we don't end up leaving around FNAME entries + * for dbps that shouldn't have them. + */ + if ((dbp = dblp->dbentry[i].dbp) != NULL) { + /* + * It's unsafe to call DB->close or revoke_id + * while holding the thread lock, because + * we'll call __dbreg_rem_dbentry and grab it again. + * + * Just drop it. Since dbreg ids go monotonically + * upward, concurrent opens should be safe, and the + * user should have no business closing files while + * we're in this loop anyway--we're in the process of + * making all outstanding dbps invalid. + */ + /* + * If we only want to close those FNAMES marked + * as restored, check now. + */ + if (do_restored && + !F_ISSET(dbp->log_filename, DB_FNAME_RESTORED)) + continue; + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + if (F_ISSET(dbp, DB_AM_RECOVER)) + t_ret = __db_close(dbp, + NULL, dbp->mpf == NULL ? DB_NOSYNC : 0); + else + t_ret = __dbreg_revoke_id( + dbp, 0, DB_LOGFILEID_INVALID); + if (ret == 0) + ret = t_ret; + MUTEX_LOCK(env, dblp->mtx_dbreg); + } + + dblp->dbentry[i].deleted = 0; + dblp->dbentry[i].dbp = NULL; + } + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + return (ret); +} + +/* + * __dbreg_close_file -- + * Close a database file opened by recovery. + * PUBLIC: int __dbreg_close_file __P((ENV *, FNAME *)); + */ +int +__dbreg_close_file(env, fnp) + ENV *env; + FNAME *fnp; +{ + DB *dbp; + DB_LOG *dblp; + + dblp = env->lg_handle; + + dbp = dblp->dbentry[fnp->id].dbp; + if (dbp == NULL) + return (0); + DB_ASSERT(env, dbp->log_filename == fnp); + DB_ASSERT(env, F_ISSET(dbp, DB_AM_RECOVER)); + return (__db_close(dbp, NULL, DB_NOSYNC)); +} + +/* + * __dbreg_mark_restored -- + * Mark files when we change replication roles and there are outstanding + * prepared txns that may use these files. These will be invalidated later + * when all outstanding prepared txns are resolved. + * + * PUBLIC: int __dbreg_mark_restored __P((ENV *)); + */ +int +__dbreg_mark_restored(env) + ENV *env; +{ + DB_LOG *dblp; + FNAME *fnp; + LOG *lp; + + /* If we haven't initialized logging, we have nothing to do. */ + if (!LOGGING_ON(env)) + return (0); + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + MUTEX_LOCK(env, lp->mtx_filelist); + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) + if (fnp->id != DB_LOGFILEID_INVALID) + F_SET(fnp, DB_FNAME_RESTORED); + + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (0); +} + +/* + * __dbreg_invalidate_files -- + * Invalidate files when we change replication roles. Save the + * id so that another process will be able to clean up the information + * when it notices. + * + * PUBLIC: int __dbreg_invalidate_files __P((ENV *, int)); + */ +int +__dbreg_invalidate_files(env, do_restored) + ENV *env; + int do_restored; +{ + DB_LOG *dblp; + FNAME *fnp; + LOG *lp; + int ret; + + /* If we haven't initialized logging, we have nothing to do. */ + if (!LOGGING_ON(env)) + return (0); + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + ret = 0; + MUTEX_LOCK(env, lp->mtx_filelist); + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) { + /* + * Normally, skip any file with DB_FNAME_RESTORED + * set. If do_restored is set, only invalidate + * those files with the flag set and skip all others. + */ + if (F_ISSET(fnp, DB_FNAME_RESTORED) && !do_restored) + continue; + if (!F_ISSET(fnp, DB_FNAME_RESTORED) && do_restored) + continue; + if (fnp->id != DB_LOGFILEID_INVALID) { + if ((ret = __dbreg_log_close(env, + fnp, NULL, DBREG_RCLOSE)) != 0) + goto err; + fnp->old_id = fnp->id; + fnp->id = DB_LOGFILEID_INVALID; + } + } +err: MUTEX_UNLOCK(env, lp->mtx_filelist); + return (ret); +} + +/* + * __dbreg_id_to_db -- + * Return the DB corresponding to the specified dbreg id. + * + * PUBLIC: int __dbreg_id_to_db __P((ENV *, DB_TXN *, DB **, int32_t, int)); + */ +int +__dbreg_id_to_db(env, txn, dbpp, ndx, tryopen) + ENV *env; + DB_TXN *txn; + DB **dbpp; + int32_t ndx; + int tryopen; +{ + DB_LOG *dblp; + FNAME *fname; + int ret; + char *name; + + dblp = env->lg_handle; + ret = 0; + + MUTEX_LOCK(env, dblp->mtx_dbreg); + + /* + * We take a final parameter that indicates whether we should attempt + * to open the file if no mapping is found. During recovery, the + * recovery routines all want to try to open the file (and this is + * called from __dbreg_id_to_db), however, if we have a multi-process + * environment where some processes may not have the files open, + * then we also get called from __dbreg_assign_id and it's OK if + * there is no mapping. + * + * Under failchk, a process different than the one issuing DB + * operations may abort a transaction. In this case, the "recovery" + * routines are run by a process that does not necessarily have the + * file open, so we we must open the file explicitly. + */ + if (ndx >= dblp->dbentry_cnt || + (!dblp->dbentry[ndx].deleted && dblp->dbentry[ndx].dbp == NULL)) { + if (!tryopen || F_ISSET(dblp, DBLOG_RECOVER)) { + ret = ENOENT; + goto err; + } + + /* + * __dbreg_id_to_fname acquires the mtx_filelist mutex, which + * we can't safely acquire while we hold the thread lock. We + * no longer need it anyway--the dbentry table didn't have what + * we needed. + */ + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + + if (__dbreg_id_to_fname(dblp, ndx, 0, &fname) != 0) + /* + * With transactional opens, we may actually have + * closed this file in the transaction in which + * case this will fail too. Then it's up to the + * caller to reopen the file. + */ + return (ENOENT); + + /* + * Note that we're relying on fname not to change, even though + * we released the mutex that protects it (mtx_filelist) inside + * __dbreg_id_to_fname. This should be a safe assumption, the + * other process that has the file open shouldn't be closing it + * while we're trying to abort. + */ + name = fname->fname_off == INVALID_ROFF ? + NULL : R_ADDR(&dblp->reginfo, fname->fname_off); + + /* + * At this point, we are not holding the thread lock, so exit + * directly instead of going through the exit code at the + * bottom. If the __dbreg_do_open succeeded, then we don't need + * to do any of the remaining error checking at the end of this + * routine. + * If TXN_INVALID is passed then no txnlist is needed. + */ + if ((ret = __dbreg_do_open(env, txn, dblp, + fname->ufid, name, fname->s_type, ndx, fname->meta_pgno, + NULL, TXN_INVALID, F_ISSET(fname, DB_FNAME_INMEM) ? + DBREG_REOPEN : DBREG_OPEN)) != 0) + return (ret); + + *dbpp = dblp->dbentry[ndx].dbp; + return (*dbpp == NULL ? DB_DELETED : 0); + } + + /* + * Return DB_DELETED if the file has been deleted (it's not an error). + */ + if (dblp->dbentry[ndx].deleted) { + ret = DB_DELETED; + goto err; + } + + /* It's an error if we don't have a corresponding writeable DB. */ + if ((*dbpp = dblp->dbentry[ndx].dbp) == NULL) + ret = ENOENT; + else + /* + * If we are in recovery, then set that the file has + * been written. It is possible to run recovery, + * find all the pages in their post update state + * in the OS buffer pool, put a checkpoint in the log + * and then crash the system without forcing the pages + * to disk. If this is an in-memory file, we may not have + * an mpf yet. + */ + if ((*dbpp)->mpf != NULL && (*dbpp)->mpf->mfp != NULL) + (*dbpp)->mpf->mfp->file_written = 1; + +err: MUTEX_UNLOCK(env, dblp->mtx_dbreg); + return (ret); +} + +/* + * __dbreg_id_to_fname -- + * Traverse the shared-memory region looking for the entry that + * matches the passed dbreg id. Returns 0 on success; -1 on error. + * + * PUBLIC: int __dbreg_id_to_fname __P((DB_LOG *, int32_t, int, FNAME **)); + */ +int +__dbreg_id_to_fname(dblp, id, have_lock, fnamep) + DB_LOG *dblp; + int32_t id; + int have_lock; + FNAME **fnamep; +{ + ENV *env; + FNAME *fnp; + LOG *lp; + int ret; + + env = dblp->env; + lp = dblp->reginfo.primary; + + ret = -1; + + if (!have_lock) + MUTEX_LOCK(env, lp->mtx_filelist); + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) + if (fnp->id == id) { + *fnamep = fnp; + ret = 0; + break; + } + if (!have_lock) + MUTEX_UNLOCK(env, lp->mtx_filelist); + + return (ret); +} +/* + * __dbreg_fid_to_fname -- + * Traverse the shared-memory region looking for the entry that + * matches the passed file unique id. Returns 0 on success; -1 on error. + * + * PUBLIC: int __dbreg_fid_to_fname __P((DB_LOG *, u_int8_t *, int, FNAME **)); + */ +int +__dbreg_fid_to_fname(dblp, fid, have_lock, fnamep) + DB_LOG *dblp; + u_int8_t *fid; + int have_lock; + FNAME **fnamep; +{ + ENV *env; + FNAME *fnp; + LOG *lp; + int ret; + + env = dblp->env; + lp = dblp->reginfo.primary; + + ret = -1; + + if (!have_lock) + MUTEX_LOCK(env, lp->mtx_filelist); + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) + if (memcmp(fnp->ufid, fid, DB_FILE_ID_LEN) == 0) { + *fnamep = fnp; + ret = 0; + break; + } + if (!have_lock) + MUTEX_UNLOCK(env, lp->mtx_filelist); + + return (ret); +} + +/* + * __dbreg_get_name + * + * Interface to get name of registered files. This is mainly diagnostic + * and the name passed could be transient unless there is something + * ensuring that the file cannot be closed. + * + * PUBLIC: int __dbreg_get_name __P((ENV *, u_int8_t *, char **, char **)); + */ +int +__dbreg_get_name(env, fid, fnamep, dnamep) + ENV *env; + u_int8_t *fid; + char **fnamep, **dnamep; +{ + DB_LOG *dblp; + FNAME *fnp; + + dblp = env->lg_handle; + + if (dblp != NULL && __dbreg_fid_to_fname(dblp, fid, 0, &fnp) == 0) { + *fnamep = fnp->fname_off == INVALID_ROFF ? + NULL : R_ADDR(&dblp->reginfo, fnp->fname_off); + *dnamep = fnp->dname_off == INVALID_ROFF ? + NULL : R_ADDR(&dblp->reginfo, fnp->dname_off); + return (0); + } + + *fnamep = *dnamep = NULL; + return (-1); +} + +/* + * __dbreg_do_open -- + * Open files referenced in the log. This is the part of the open that + * is not protected by the thread mutex. + * PUBLIC: int __dbreg_do_open __P((ENV *, + * PUBLIC: DB_TXN *, DB_LOG *, u_int8_t *, char *, DBTYPE, + * PUBLIC: int32_t, db_pgno_t, void *, u_int32_t, u_int32_t)); + */ +int +__dbreg_do_open(env, + txn, lp, uid, name, ftype, ndx, meta_pgno, info, id, opcode) + ENV *env; + DB_TXN *txn; + DB_LOG *lp; + u_int8_t *uid; + char *name; + DBTYPE ftype; + int32_t ndx; + db_pgno_t meta_pgno; + void *info; + u_int32_t id, opcode; +{ + DB *dbp; + u_int32_t cstat, ret_stat; + int ret, t_ret, try_inmem; + char *dname, *fname; + + cstat = TXN_EXPECTED; + fname = name; + dname = NULL; + try_inmem = 0; + +retry_inmem: + if ((ret = __db_create_internal(&dbp, lp->env, 0)) != 0) + return (ret); + + /* + * We can open files under a number of different scenarios. + * First, we can open a file during a normal txn_abort, if that file + * was opened and closed during the transaction (as is the master + * database of a sub-database). + * Second, we might be aborting a transaction in a process other than + * the one that did it (failchk). + * Third, we might be in recovery. + * In case 3, there is no locking, so there is no issue. + * In cases 1 and 2, we are guaranteed to already hold any locks + * that we need, since we're still in the same transaction, so by + * setting DB_AM_RECOVER, we guarantee that we don't log and that + * we don't try to acquire locks on behalf of a different locker id. + */ + F_SET(dbp, DB_AM_RECOVER); + if (meta_pgno != PGNO_BASE_MD) { + memcpy(dbp->fileid, uid, DB_FILE_ID_LEN); + dbp->meta_pgno = meta_pgno; + } + if (opcode == DBREG_PREOPEN) { + dbp->type = ftype; + if ((ret = __dbreg_setup(dbp, name, NULL, id)) != 0) + goto err; + MAKE_INMEM(dbp); + goto skip_open; + } + + if (opcode == DBREG_REOPEN || try_inmem) { + MAKE_INMEM(dbp); + fname = NULL; + dname = name; + } + + if ((ret = __db_open(dbp, NULL, txn, fname, dname, ftype, + DB_DURABLE_UNKNOWN | DB_ODDFILESIZE, + DB_MODE_600, meta_pgno)) == 0) { +skip_open: + /* + * Verify that we are opening the same file that we were + * referring to when we wrote this log record. + */ + if ((meta_pgno != PGNO_BASE_MD && + __dbreg_check_master(env, uid, name) != 0) || + memcmp(uid, dbp->fileid, DB_FILE_ID_LEN) != 0) + cstat = TXN_UNEXPECTED; + else + cstat = TXN_EXPECTED; + + /* Assign the specific dbreg id to this dbp. */ + if ((ret = __dbreg_assign_id(dbp, ndx, 0)) != 0) + goto err; + + /* + * If we successfully opened this file, then we need to + * convey that information to the txnlist so that we + * know how to handle the subtransaction that created + * the file system object. + */ + if (id != TXN_INVALID) + ret = __db_txnlist_update(env, + info, id, cstat, NULL, &ret_stat, 1); + +err: if (cstat == TXN_UNEXPECTED) + goto not_right; + return (ret); + } else if (ret == ENOENT) { + /* + * If the open failed with ENOENT, retry it as a named in-mem + * database. Some record types do not distinguish between a + * named in-memory database and one on-disk. Therefore, an + * internal init via replication that is trying to open and + * access this as a named in-mem database will not find it + * on-disk, and we need to try to open it in-memory too. + * But don't do this for [P]REOPEN, since we're already + * handling those cases specially, above. + */ + if (try_inmem == 0 && + opcode != DBREG_PREOPEN && opcode != DBREG_REOPEN) { + if ((ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0) + return (ret); + try_inmem = 1; + goto retry_inmem; + } else if (try_inmem != 0) + CLR_INMEM(dbp); + + /* + * If it exists neither on disk nor in memory + * record that the open failed in the txnlist. + */ + if (id != TXN_INVALID && (ret = __db_txnlist_update(env, + info, id, TXN_UNEXPECTED, NULL, &ret_stat, 1)) != 0) + goto not_right; + + /* + * If this is file is missing then we may have crashed + * without writing the corresponding close, record + * the open so recovery will write a close record + * with its checkpoint. + */ + if ((opcode == DBREG_CHKPNT || opcode == DBREG_OPEN) && + dbp->log_filename == NULL && + (ret = __dbreg_setup(dbp, name, NULL, id)) != 0) + return (ret); + ret = __dbreg_assign_id(dbp, ndx, 1); + return (ret); + } +not_right: + if ((t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0) + return (ret == 0 ? t_ret : ret); + + /* Add this file as deleted. */ + if ((t_ret = __dbreg_add_dbentry(env, lp, NULL, ndx)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +static int +__dbreg_check_master(env, uid, name) + ENV *env; + u_int8_t *uid; + char *name; +{ + DB *dbp; + int ret; + + ret = 0; + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + return (ret); + F_SET(dbp, DB_AM_RECOVER); + ret = __db_open(dbp, NULL, NULL, + name, NULL, DB_BTREE, 0, DB_MODE_600, PGNO_BASE_MD); + + if (ret == 0 && memcmp(uid, dbp->fileid, DB_FILE_ID_LEN) != 0) + ret = EINVAL; + + (void)__db_close(dbp, NULL, 0); + return (ret); +} + +/* + * __dbreg_lazy_id -- + * When a replication client gets upgraded to being a replication master, + * it may have database handles open that have not been assigned an ID, but + * which have become legal to use for logging. + * + * This function lazily allocates a new ID for such a function, in a + * new transaction created for the purpose. We need to do this in a new + * transaction because we definitely wish to commit the dbreg_register, but + * at this point we have no way of knowing whether the log record that incited + * us to call this will be part of a committed transaction. + * + * We first revoke any old id this handle may have had. That can happen + * if a master becomes a client and then becomes a master again and + * there are other processes with valid open handles to this env. + * + * PUBLIC: int __dbreg_lazy_id __P((DB *)); + */ +int +__dbreg_lazy_id(dbp) + DB *dbp; +{ + DB_LOG *dblp; + DB_TXN *txn; + ENV *env; + FNAME *fnp; + LOG *lp; + int32_t id; + int ret; + + env = dbp->env; + + DB_ASSERT(env, IS_REP_MASTER(env)); + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + /* The mtx_filelist protects the FNAME list and id management. */ + MUTEX_LOCK(env, lp->mtx_filelist); + if (fnp->id != DB_LOGFILEID_INVALID) { + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (0); + } + id = DB_LOGFILEID_INVALID; + /* + * When we became master we moved the fnp->id to old_id in + * every FNAME structure that was open. If our id was changed, + * we need to revoke and give back that id. + */ + if (fnp->old_id != DB_LOGFILEID_INVALID && + (ret = __dbreg_revoke_id(dbp, 1, DB_LOGFILEID_INVALID)) != 0) + goto err; + if ((ret = __txn_begin(env, NULL, NULL, &txn, 0)) != 0) + goto err; + + if ((ret = __dbreg_get_id(dbp, txn, &id)) != 0) { + (void)__txn_abort(txn); + goto err; + } + + if ((ret = __txn_commit(txn, DB_TXN_NOSYNC)) != 0) + goto err; + + /* + * All DB related logging routines check the id value *without* + * holding the mtx_filelist to know whether we need to call + * dbreg_lazy_id to begin with. We must set the ID after a + * *successful* commit so that there is no possibility of a second + * modification call finding a valid ID in the dbp before the + * dbreg_register and commit records are in the log. + * If there was an error, then we call __dbreg_revoke_id to + * remove the entry from the lists. + */ + fnp->id = id; +err: + if (ret != 0 && id != DB_LOGFILEID_INVALID) + (void)__dbreg_revoke_id(dbp, 1, id); + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (ret); +} |