summaryrefslogtreecommitdiff
path: root/db-4.8.30/os/os_map.c
diff options
context:
space:
mode:
Diffstat (limited to 'db-4.8.30/os/os_map.c')
-rw-r--r--db-4.8.30/os/os_map.c594
1 files changed, 594 insertions, 0 deletions
diff --git a/db-4.8.30/os/os_map.c b/db-4.8.30/os/os_map.c
new file mode 100644
index 0000000..b5de0ed
--- /dev/null
+++ b/db-4.8.30/os/os_map.c
@@ -0,0 +1,594 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2009 Oracle. All rights reserved.
+ *
+ * $Id$
+ */
+
+#include "db_config.h"
+
+#include "db_int.h"
+
+#ifdef HAVE_SYSTEM_INCLUDE_FILES
+#ifdef HAVE_MMAP
+#include <sys/mman.h>
+#endif
+
+#ifdef HAVE_SHMGET
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#endif
+#endif
+
+#ifdef HAVE_MMAP
+static int __os_map __P((ENV *, char *, DB_FH *, size_t, int, int, void **));
+#endif
+#ifdef HAVE_SHMGET
+static int __shm_mode __P((ENV *));
+#else
+static int __no_system_mem __P((ENV *));
+#endif
+
+/*
+ * __os_attach --
+ * Create/join a shared memory region.
+ *
+ * PUBLIC: int __os_attach __P((ENV *, REGINFO *, REGION *));
+ */
+int
+__os_attach(env, infop, rp)
+ ENV *env;
+ REGINFO *infop;
+ REGION *rp;
+{
+ DB_ENV *dbenv;
+ int create_ok, ret;
+
+ /*
+ * We pass a DB_ENV handle to the user's replacement map function,
+ * so there must be a valid handle.
+ */
+ DB_ASSERT(env, env != NULL && env->dbenv != NULL);
+ dbenv = env->dbenv;
+
+ if (DB_GLOBAL(j_region_map) != NULL) {
+ /*
+ * We have to find out if the region is being created. Ask
+ * the underlying map function, and use the REGINFO structure
+ * to pass that information back to our caller.
+ */
+ create_ok = F_ISSET(infop, REGION_CREATE) ? 1 : 0;
+ ret = DB_GLOBAL(j_region_map)
+ (dbenv, infop->name, rp->size, &create_ok, &infop->addr);
+ if (create_ok)
+ F_SET(infop, REGION_CREATE);
+ else
+ F_CLR(infop, REGION_CREATE);
+ return (ret);
+ }
+
+ if (F_ISSET(env, ENV_SYSTEM_MEM)) {
+ /*
+ * If the region is in system memory on UNIX, we use shmget(2).
+ *
+ * !!!
+ * There exist spinlocks that don't work in shmget memory, e.g.,
+ * the HP/UX msemaphore interface. If we don't have locks that
+ * will work in shmget memory, we better be private and not be
+ * threaded. If we reach this point, we know we're public, so
+ * it's an error.
+ */
+#if defined(HAVE_MUTEX_HPPA_MSEM_INIT)
+ __db_errx(env,
+ "architecture does not support locks inside system shared memory");
+ return (EINVAL);
+#endif
+#if defined(HAVE_SHMGET)
+ {
+ key_t segid;
+ int id, mode;
+
+ /*
+ * We could potentially create based on REGION_CREATE_OK, but
+ * that's dangerous -- we might get crammed in sideways if
+ * some of the expected regions exist but others do not. Also,
+ * if the requested size differs from an existing region's
+ * actual size, then all sorts of nasty things can happen.
+ * Basing create solely on REGION_CREATE is much safer -- a
+ * recovery will get us straightened out.
+ */
+ if (F_ISSET(infop, REGION_CREATE)) {
+ /*
+ * The application must give us a base System V IPC key
+ * value. Adjust that value based on the region's ID,
+ * and correct so the user's original value appears in
+ * the ipcs output.
+ */
+ if (dbenv->shm_key == INVALID_REGION_SEGID) {
+ __db_errx(env,
+ "no base system shared memory ID specified");
+ return (EINVAL);
+ }
+
+ /*
+ * !!!
+ * The BDB API takes a "long" as the base segment ID,
+ * then adds an unsigned 32-bit value and stores it
+ * in a key_t. Wrong, admittedly, but not worth an
+ * API change to fix.
+ */
+ segid = (key_t)
+ ((u_long)dbenv->shm_key + (infop->id - 1));
+
+ /*
+ * If map to an existing region, assume the application
+ * crashed and we're restarting. Delete the old region
+ * and re-try. If that fails, return an error, the
+ * application will have to select a different segment
+ * ID or clean up some other way.
+ */
+ if ((id = shmget(segid, 0, 0)) != -1) {
+ (void)shmctl(id, IPC_RMID, NULL);
+ if ((id = shmget(segid, 0, 0)) != -1) {
+ __db_errx(env,
+ "shmget: key: %ld: shared system memory region already exists",
+ (long)segid);
+ return (EAGAIN);
+ }
+ }
+
+ /*
+ * Map the DbEnv::open method file mode permissions to
+ * shmget call permissions.
+ */
+ mode = IPC_CREAT | __shm_mode(env);
+ if ((id = shmget(segid, rp->size, mode)) == -1) {
+ ret = __os_get_syserr();
+ __db_syserr(env, ret,
+ "shmget: key: %ld: unable to create shared system memory region",
+ (long)segid);
+ return (__os_posix_err(ret));
+ }
+ rp->segid = id;
+ } else
+ id = rp->segid;
+
+ if ((infop->addr = shmat(id, NULL, 0)) == (void *)-1) {
+ infop->addr = NULL;
+ ret = __os_get_syserr();
+ __db_syserr(env, ret,
+ "shmat: id %d: unable to attach to shared system memory region", id);
+ return (__os_posix_err(ret));
+ }
+
+ /* Optionally lock the memory down. */
+ if (F_ISSET(env, ENV_LOCKDOWN)) {
+#ifdef HAVE_SHMCTL_SHM_LOCK
+ ret = shmctl(
+ id, SHM_LOCK, NULL) == 0 ? 0 : __os_get_syserr();
+#else
+ ret = DB_OPNOTSUP;
+#endif
+ if (ret != 0) {
+ __db_syserr(env, ret,
+ "shmctl/SHM_LOCK: id %d: unable to lock down shared memory region", id);
+ return (__os_posix_err(ret));
+ }
+ }
+
+ return (0);
+ }
+#else
+ return (__no_system_mem(env));
+#endif
+ }
+
+#ifdef HAVE_MMAP
+ {
+ DB_FH *fhp;
+
+ fhp = NULL;
+
+ /*
+ * Try to open/create the shared region file. We DO NOT need to ensure
+ * that multiple threads/processes attempting to simultaneously create
+ * the region are properly ordered, our caller has already taken care
+ * of that.
+ */
+ if ((ret = __os_open(env, infop->name, 0,
+ DB_OSO_REGION |
+ (F_ISSET(infop, REGION_CREATE_OK) ? DB_OSO_CREATE : 0),
+ env->db_mode, &fhp)) != 0)
+ __db_err(env, ret, "%s", infop->name);
+
+ /*
+ * If we created the file, grow it to its full size before mapping
+ * it in. We really want to avoid touching the buffer cache after
+ * mmap(2) is called, doing anything else confuses the hell out of
+ * systems without merged VM/buffer cache systems, or, more to the
+ * point, *badly* merged VM/buffer cache systems.
+ */
+ if (ret == 0 && F_ISSET(infop, REGION_CREATE)) {
+ if (F_ISSET(dbenv, DB_ENV_REGION_INIT))
+ ret = __db_file_write(env, fhp,
+ rp->size / MEGABYTE, rp->size % MEGABYTE, 0x00);
+ else
+ ret = __db_file_extend(env, fhp, rp->size);
+ }
+
+ /* Map the file in. */
+ if (ret == 0)
+ ret = __os_map(env,
+ infop->name, fhp, rp->size, 1, 0, &infop->addr);
+
+ if (fhp != NULL)
+ (void)__os_closehandle(env, fhp);
+
+ return (ret);
+ }
+#else
+ COMPQUIET(infop, NULL);
+ COMPQUIET(rp, NULL);
+ __db_errx(env,
+ "architecture lacks mmap(2), shared environments not possible");
+ return (DB_OPNOTSUP);
+#endif
+}
+
+/*
+ * __os_detach --
+ * Detach from a shared memory region.
+ *
+ * PUBLIC: int __os_detach __P((ENV *, REGINFO *, int));
+ */
+int
+__os_detach(env, infop, destroy)
+ ENV *env;
+ REGINFO *infop;
+ int destroy;
+{
+ DB_ENV *dbenv;
+ REGION *rp;
+ int ret;
+
+ /*
+ * We pass a DB_ENV handle to the user's replacement unmap function,
+ * so there must be a valid handle.
+ */
+ DB_ASSERT(env, env != NULL && env->dbenv != NULL);
+ dbenv = env->dbenv;
+
+ rp = infop->rp;
+
+ /* If the user replaced the unmap call, call through their interface. */
+ if (DB_GLOBAL(j_region_unmap) != NULL)
+ return (DB_GLOBAL(j_region_unmap)(dbenv, infop->addr));
+
+ if (F_ISSET(env, ENV_SYSTEM_MEM)) {
+#ifdef HAVE_SHMGET
+ int segid;
+
+ /*
+ * We may be about to remove the memory referenced by rp,
+ * save the segment ID, and (optionally) wipe the original.
+ */
+ segid = rp->segid;
+ if (destroy)
+ rp->segid = INVALID_REGION_SEGID;
+
+ if (shmdt(infop->addr) != 0) {
+ ret = __os_get_syserr();
+ __db_syserr(env, ret, "shmdt");
+ return (__os_posix_err(ret));
+ }
+
+ if (destroy && shmctl(segid, IPC_RMID,
+ NULL) != 0 && (ret = __os_get_syserr()) != EINVAL) {
+ __db_syserr(env, ret,
+ "shmctl: id %d: unable to delete system shared memory region",
+ segid);
+ return (__os_posix_err(ret));
+ }
+
+ return (0);
+#else
+ return (__no_system_mem(env));
+#endif
+ }
+
+#ifdef HAVE_MMAP
+#ifdef HAVE_MUNLOCK
+ if (F_ISSET(env, ENV_LOCKDOWN))
+ (void)munlock(infop->addr, rp->size);
+#endif
+ if (munmap(infop->addr, rp->size) != 0) {
+ ret = __os_get_syserr();
+ __db_syserr(env, ret, "munmap");
+ return (__os_posix_err(ret));
+ }
+
+ if (destroy && (ret = __os_unlink(env, infop->name, 1)) != 0)
+ return (ret);
+
+ return (0);
+#else
+ COMPQUIET(destroy, 0);
+ COMPQUIET(ret, 0);
+ return (EINVAL);
+#endif
+}
+
+/*
+ * __os_mapfile --
+ * Map in a shared memory file.
+ *
+ * PUBLIC: int __os_mapfile __P((ENV *, char *, DB_FH *, size_t, int, void **));
+ */
+int
+__os_mapfile(env, path, fhp, len, is_rdonly, addrp)
+ ENV *env;
+ char *path;
+ DB_FH *fhp;
+ int is_rdonly;
+ size_t len;
+ void **addrp;
+{
+#if defined(HAVE_MMAP) && !defined(HAVE_QNX)
+ DB_ENV *dbenv;
+
+ /* If the user replaced the map call, call through their interface. */
+ if (DB_GLOBAL(j_file_map) != NULL) {
+ /*
+ * We pass a DB_ENV handle to the user's replacement map
+ * function, so there must be a valid handle.
+ */
+ DB_ASSERT(env, env != NULL && env->dbenv != NULL);
+ dbenv = env->dbenv;
+
+ return (
+ DB_GLOBAL(j_file_map)(dbenv, path, len, is_rdonly, addrp));
+ }
+
+ return (__os_map(env, path, fhp, len, 0, is_rdonly, addrp));
+#else
+ COMPQUIET(env, NULL);
+ COMPQUIET(path, NULL);
+ COMPQUIET(fhp, NULL);
+ COMPQUIET(is_rdonly, 0);
+ COMPQUIET(len, 0);
+ COMPQUIET(addrp, NULL);
+ return (DB_OPNOTSUP);
+#endif
+}
+
+/*
+ * __os_unmapfile --
+ * Unmap the shared memory file.
+ *
+ * PUBLIC: int __os_unmapfile __P((ENV *, void *, size_t));
+ */
+int
+__os_unmapfile(env, addr, len)
+ ENV *env;
+ void *addr;
+ size_t len;
+{
+ DB_ENV *dbenv;
+ int ret;
+
+ /*
+ * We pass a DB_ENV handle to the user's replacement unmap function,
+ * so there must be a valid handle.
+ */
+ DB_ASSERT(env, env != NULL && env->dbenv != NULL);
+ dbenv = env->dbenv;
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL))
+ __db_msg(env, "fileops: munmap");
+
+ /* If the user replaced the map call, call through their interface. */
+ if (DB_GLOBAL(j_file_unmap) != NULL)
+ return (DB_GLOBAL(j_file_unmap)(dbenv, addr));
+
+#ifdef HAVE_MMAP
+#ifdef HAVE_MUNLOCK
+ if (F_ISSET(env, ENV_LOCKDOWN))
+ RETRY_CHK((munlock(addr, len)), ret);
+ /*
+ * !!!
+ * The return value is ignored.
+ */
+#else
+ COMPQUIET(env, NULL);
+#endif
+ RETRY_CHK((munmap(addr, len)), ret);
+ ret = __os_posix_err(ret);
+#else
+ COMPQUIET(env, NULL);
+ ret = EINVAL;
+#endif
+ return (ret);
+}
+
+#ifdef HAVE_MMAP
+/*
+ * __os_map --
+ * Call the mmap(2) function.
+ */
+static int
+__os_map(env, path, fhp, len, is_region, is_rdonly, addrp)
+ ENV *env;
+ char *path;
+ DB_FH *fhp;
+ int is_region, is_rdonly;
+ size_t len;
+ void **addrp;
+{
+ DB_ENV *dbenv;
+ int flags, prot, ret;
+ void *p;
+
+ /*
+ * We pass a DB_ENV handle to the user's replacement map function,
+ * so there must be a valid handle.
+ */
+ DB_ASSERT(env, env != NULL && env->dbenv != NULL);
+ dbenv = env->dbenv;
+
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL))
+ __db_msg(env, "fileops: mmap %s", path);
+
+ DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1);
+
+ /*
+ * If it's read-only, it's private, and if it's not, it's shared.
+ * Don't bother with an additional parameter.
+ */
+ flags = is_rdonly ? MAP_PRIVATE : MAP_SHARED;
+
+#ifdef MAP_FILE
+ /*
+ * Historically, MAP_FILE was required for mapping regular files,
+ * even though it was the default. Some systems have it, some
+ * don't, some that have it set it to 0.
+ */
+ flags |= MAP_FILE;
+#endif
+
+ /*
+ * I know of no systems that implement the flag to tell the system
+ * that the region contains semaphores, but it's not an unreasonable
+ * thing to do, and has been part of the design since forever. I
+ * don't think anyone will object, but don't set it for read-only
+ * files, it doesn't make sense.
+ */
+#ifdef MAP_HASSEMAPHORE
+ if (is_region && !is_rdonly)
+ flags |= MAP_HASSEMAPHORE;
+#else
+ COMPQUIET(is_region, 0);
+#endif
+
+ /*
+ * FreeBSD:
+ * Causes data dirtied via this VM map to be flushed to physical media
+ * only when necessary (usually by the pager) rather then gratuitously.
+ * Typically this prevents the update daemons from flushing pages
+ * dirtied through such maps and thus allows efficient sharing of
+ * memory across unassociated processes using a file-backed shared
+ * memory map.
+ */
+#ifdef MAP_NOSYNC
+ flags |= MAP_NOSYNC;
+#endif
+
+ prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE);
+
+ /*
+ * XXX
+ * Work around a bug in the VMS V7.1 mmap() implementation. To map
+ * a file into memory on VMS it needs to be opened in a certain way,
+ * originally. To get the file opened in that certain way, the VMS
+ * mmap() closes the file and re-opens it. When it does this, it
+ * doesn't flush any caches out to disk before closing. The problem
+ * this causes us is that when the memory cache doesn't get written
+ * out, the file isn't big enough to match the memory chunk and the
+ * mmap() call fails. This call to fsync() fixes the problem. DEC
+ * thinks this isn't a bug because of language in XPG5 discussing user
+ * responsibility for on-disk and in-memory synchronization.
+ */
+#ifdef VMS
+ if (__os_fsync(env, fhp) == -1)
+ return (__os_posix_err(__os_get_syserr()));
+#endif
+
+ /* MAP_FAILED was not defined in early mmap implementations. */
+#ifndef MAP_FAILED
+#define MAP_FAILED -1
+#endif
+ if ((p = mmap(NULL,
+ len, prot, flags, fhp->fd, (off_t)0)) == (void *)MAP_FAILED) {
+ ret = __os_get_syserr();
+ __db_syserr(env, ret, "mmap");
+ return (__os_posix_err(ret));
+ }
+
+ /*
+ * If it's a region, we want to make sure that the memory isn't paged.
+ * For example, Solaris will page large mpools because it thinks that
+ * I/O buffer memory is more important than we are. The mlock system
+ * call may or may not succeed (mlock is restricted to the super-user
+ * on some systems). Currently, the only other use of mmap in DB is
+ * to map read-only databases -- we don't want them paged, either, so
+ * the call isn't conditional.
+ */
+ if (F_ISSET(env, ENV_LOCKDOWN)) {
+#ifdef HAVE_MLOCK
+ ret = mlock(p, len) == 0 ? 0 : __os_get_syserr();
+#else
+ ret = DB_OPNOTSUP;
+#endif
+ if (ret != 0) {
+ __db_syserr(env, ret, "mlock");
+ return (__os_posix_err(ret));
+ }
+ }
+
+ *addrp = p;
+ return (0);
+}
+#endif
+
+#ifdef HAVE_SHMGET
+#ifndef SHM_R
+#define SHM_R 0400
+#endif
+#ifndef SHM_W
+#define SHM_W 0200
+#endif
+
+/*
+ * __shm_mode --
+ * Map the DbEnv::open method file mode permissions to shmget call
+ * permissions.
+ */
+static int
+__shm_mode(env)
+ ENV *env;
+{
+ int mode;
+
+ /* Default to r/w owner, r/w group. */
+ if (env->db_mode == 0)
+ return (SHM_R | SHM_W | SHM_R >> 3 | SHM_W >> 3);
+
+ mode = 0;
+ if (env->db_mode & S_IRUSR)
+ mode |= SHM_R;
+ if (env->db_mode & S_IWUSR)
+ mode |= SHM_W;
+ if (env->db_mode & S_IRGRP)
+ mode |= SHM_R >> 3;
+ if (env->db_mode & S_IWGRP)
+ mode |= SHM_W >> 3;
+ if (env->db_mode & S_IROTH)
+ mode |= SHM_R >> 6;
+ if (env->db_mode & S_IWOTH)
+ mode |= SHM_W >> 6;
+ return (mode);
+}
+#else
+/*
+ * __no_system_mem --
+ * No system memory environments error message.
+ */
+static int
+__no_system_mem(env)
+ ENV *env;
+{
+ __db_errx(env,
+ "architecture doesn't support environments in system memory");
+ return (DB_OPNOTSUP);
+}
+#endif /* HAVE_SHMGET */