-/* all contexts, to ensure no double-opens (fcntl locks don't nest!) */
-static TDB_CONTEXT *tdbs = NULL;
-
-static int tdb_munmap(TDB_CONTEXT *tdb)
-{
- if (tdb->flags & TDB_INTERNAL)
- return 0;
-
-#ifdef HAVE_MMAP
- if (tdb->map_ptr) {
- int ret = munmap(tdb->map_ptr, tdb->map_size);
- if (ret != 0)
- return ret;
- }
-#endif
- tdb->map_ptr = NULL;
- return 0;
-}
-
-static void tdb_mmap(TDB_CONTEXT *tdb)
-{
- if (tdb->flags & TDB_INTERNAL)
- return;
-
-#ifdef HAVE_MMAP
- if (!(tdb->flags & TDB_NOMMAP)) {
- tdb->map_ptr = mmap(NULL, tdb->map_size,
- PROT_READ|(tdb->read_only? 0:PROT_WRITE),
- MAP_SHARED|MAP_FILE, tdb->fd, 0);
-
- /*
- * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
- */
-
- if (tdb->map_ptr == MAP_FAILED) {
- tdb->map_ptr = NULL;
- TDB_LOG((tdb, 2, "tdb_mmap failed for size %d (%s)\n",
- tdb->map_size, strerror(errno)));
- }
- } else {
- tdb->map_ptr = NULL;
- }
-#else
- tdb->map_ptr = NULL;
-#endif
-}
-
-/* Endian conversion: we only ever deal with 4 byte quantities */
-static void *convert(void *buf, u32 size)
-{
- u32 i, *p = buf;
- for (i = 0; i < size / 4; i++)
- p[i] = TDB_BYTEREV(p[i]);
- return buf;
-}
-#define DOCONV() (tdb->flags & TDB_CONVERT)
-#define CONVERT(x) (DOCONV() ? convert(&x, sizeof(x)) : &x)
-
-/* the body of the database is made of one list_struct for the free space
- plus a separate data list for each hash value */
-struct list_struct {
- tdb_off next; /* offset of the next record in the list */
- tdb_len rec_len; /* total byte length of record */
- tdb_len key_len; /* byte length of key */
- tdb_len data_len; /* byte length of data */
- u32 full_hash; /* the full 32 bit hash of the key */
- u32 magic; /* try to catch errors */
- /* the following union is implied:
- union {
- char record[rec_len];
- struct {
- char key[key_len];
- char data[data_len];
- }
- u32 totalsize; (tailer)
- }
- */
-};
-
-/* a byte range locking function - return 0 on success
- this functions locks/unlocks 1 byte at the specified offset.
-
- On error, errno is also set so that errors are passed back properly
- through tdb_open(). */
-static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset,
- int rw_type, int lck_type, int probe)
-{
- struct flock fl;
- int ret;
-
- if (tdb->flags & TDB_NOLOCK)
- return 0;
- if ((rw_type == F_WRLCK) && (tdb->read_only)) {
- errno = EACCES;
- return -1;
- }
-
- fl.l_type = rw_type;
- fl.l_whence = SEEK_SET;
- fl.l_start = offset;
- fl.l_len = 1;
- fl.l_pid = 0;
-
- do {
- ret = fcntl(tdb->fd,lck_type,&fl);
- } while (ret == -1 && errno == EINTR);
-
- if (ret == -1) {
- if (!probe && lck_type != F_SETLK) {
- /* Ensure error code is set for log fun to examine. */
- tdb->ecode = TDB_ERR_LOCK;
- TDB_LOG((tdb, 5,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d\n",
- tdb->fd, offset, rw_type, lck_type));
- }
- /* Generic lock error. errno set by fcntl.
- * EAGAIN is an expected return from non-blocking
- * locks. */
- if (errno != EAGAIN) {
- TDB_LOG((tdb, 5, "tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d: %s\n",
- tdb->fd, offset, rw_type, lck_type,
- strerror(errno)));
- }
- return TDB_ERRCODE(TDB_ERR_LOCK, -1);
- }
- return 0;
-}
-
-/* lock a list in the database. list -1 is the alloc list */
-static int tdb_lock(TDB_CONTEXT *tdb, int list, int ltype)
-{
- if (list < -1 || list >= (int)tdb->header.hash_size) {
- TDB_LOG((tdb, 0,"tdb_lock: invalid list %d for ltype=%d\n",
- list, ltype));
- return -1;
- }
- if (tdb->flags & TDB_NOLOCK)
- return 0;
-
- /* Since fcntl locks don't nest, we do a lock for the first one,
- and simply bump the count for future ones */
- if (tdb->locked[list+1].count == 0) {
- if (!tdb->read_only && tdb->header.rwlocks) {
- if (tdb_spinlock(tdb, list, ltype)) {
- TDB_LOG((tdb, 0, "tdb_lock spinlock failed on list %d ltype=%d\n",
- list, ltype));
- return -1;
- }
- } else if (tdb_brlock(tdb,FREELIST_TOP+4*list,ltype,F_SETLKW, 0)) {
- TDB_LOG((tdb, 0,"tdb_lock failed on list %d ltype=%d (%s)\n",
- list, ltype, strerror(errno)));
- return -1;
- }
- tdb->locked[list+1].ltype = ltype;
- }
- tdb->locked[list+1].count++;
- return 0;
-}
-
-/* unlock the database: returns void because it's too late for errors. */
- /* changed to return int it may be interesting to know there
- has been an error --simo */
-static int tdb_unlock(TDB_CONTEXT *tdb, int list, int ltype)
-{
- int ret = -1;
-
- if (tdb->flags & TDB_NOLOCK)
- return 0;
-
- /* Sanity checks */
- if (list < -1 || list >= (int)tdb->header.hash_size) {
- TDB_LOG((tdb, 0, "tdb_unlock: list %d invalid (%d)\n", list, tdb->header.hash_size));
- return ret;
- }
-
- if (tdb->locked[list+1].count==0) {
- TDB_LOG((tdb, 0, "tdb_unlock: count is 0\n"));
- return ret;
- }
-
- if (tdb->locked[list+1].count == 1) {
- /* Down to last nested lock: unlock underneath */
- if (!tdb->read_only && tdb->header.rwlocks) {
- ret = tdb_spinunlock(tdb, list, ltype);
- } else {
- ret = tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK, F_SETLKW, 0);
- }
- } else {
- ret = 0;
- }
- tdb->locked[list+1].count--;
-
- if (ret)
- TDB_LOG((tdb, 0,"tdb_unlock: An error occurred unlocking!\n"));
- return ret;
-}
-
-/* This is based on the hash algorithm from gdbm */
-static u32 default_tdb_hash(TDB_DATA *key)
-{
- u32 value; /* Used to compute the hash value. */
- u32 i; /* Used to cycle through random values. */
-
- /* Set the initial value from the key size. */
- for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++)
- value = (value + (key->dptr[i] << (i*5 % 24)));
-
- return (1103515243 * value + 12345);
-}
-
-/* check for an out of bounds access - if it is out of bounds then
- see if the database has been expanded by someone else and expand
- if necessary
- note that "len" is the minimum length needed for the db
-*/
-static int tdb_oob(TDB_CONTEXT *tdb, tdb_off len, int probe)
-{
- struct stat st;
- if (len <= tdb->map_size)
- return 0;
- if (tdb->flags & TDB_INTERNAL) {
- if (!probe) {
- /* Ensure ecode is set for log fn. */
- tdb->ecode = TDB_ERR_IO;
- TDB_LOG((tdb, 0,"tdb_oob len %d beyond internal malloc size %d\n",
- (int)len, (int)tdb->map_size));
- }
- return TDB_ERRCODE(TDB_ERR_IO, -1);
- }
-
- if (fstat(tdb->fd, &st) == -1)
- return TDB_ERRCODE(TDB_ERR_IO, -1);
-
- if (st.st_size < (size_t)len) {
- if (!probe) {
- /* Ensure ecode is set for log fn. */
- tdb->ecode = TDB_ERR_IO;
- TDB_LOG((tdb, 0,"tdb_oob len %d beyond eof at %d\n",
- (int)len, (int)st.st_size));
- }
- return TDB_ERRCODE(TDB_ERR_IO, -1);
- }
-
- /* Unmap, update size, remap */
- if (tdb_munmap(tdb) == -1)
- return TDB_ERRCODE(TDB_ERR_IO, -1);
- tdb->map_size = st.st_size;
- tdb_mmap(tdb);
- return 0;
-}
-
-/* write a lump of data at a specified offset */
-static int tdb_write(TDB_CONTEXT *tdb, tdb_off off, void *buf, tdb_len len)
-{
- if (tdb_oob(tdb, off + len, 0) != 0)
- return -1;
-
- if (tdb->map_ptr)
- memcpy(off + (char *)tdb->map_ptr, buf, len);
-#ifdef HAVE_PWRITE
- else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {
-#else
- else if (lseek(tdb->fd, off, SEEK_SET) != off
- || write(tdb->fd, buf, len) != (ssize_t)len) {
-#endif
- /* Ensure ecode is set for log fn. */
- tdb->ecode = TDB_ERR_IO;
- TDB_LOG((tdb, 0,"tdb_write failed at %d len=%d (%s)\n",
- off, len, strerror(errno)));
- return TDB_ERRCODE(TDB_ERR_IO, -1);
- }
- return 0;
-}
-
-/* read a lump of data at a specified offset, maybe convert */
-static int tdb_read(TDB_CONTEXT *tdb,tdb_off off,void *buf,tdb_len len,int cv)
-{
- if (tdb_oob(tdb, off + len, 0) != 0)
- return -1;
-
- if (tdb->map_ptr)
- memcpy(buf, off + (char *)tdb->map_ptr, len);
-#ifdef HAVE_PREAD
- else if (pread(tdb->fd, buf, len, off) != (ssize_t)len) {
-#else
- else if (lseek(tdb->fd, off, SEEK_SET) != off
- || read(tdb->fd, buf, len) != (ssize_t)len) {
-#endif
- /* Ensure ecode is set for log fn. */
- tdb->ecode = TDB_ERR_IO;
- TDB_LOG((tdb, 0,"tdb_read failed at %d len=%d (%s)\n",
- off, len, strerror(errno)));
- return TDB_ERRCODE(TDB_ERR_IO, -1);
- }
- if (cv)
- convert(buf, len);
- return 0;
-}
-
-/* read a lump of data, allocating the space for it */
-static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len)
-{
- char *buf;
-
- if (!(buf = malloc(len))) {
- /* Ensure ecode is set for log fn. */
- tdb->ecode = TDB_ERR_OOM;
- TDB_LOG((tdb, 0,"tdb_alloc_read malloc failed len=%d (%s)\n",
- len, strerror(errno)));
- return TDB_ERRCODE(TDB_ERR_OOM, buf);
- }
- if (tdb_read(tdb, offset, buf, len, 0) == -1) {
- SAFE_FREE(buf);
- return NULL;
- }
- return buf;
-}
-
-/* read/write a tdb_off */
-static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
-{
- return tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
-}
-static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
-{
- tdb_off off = *d;
- return tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
-}
-
-/* read/write a record */
-static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
-{
- if (tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
- return -1;
- if (TDB_BAD_MAGIC(rec)) {
- /* Ensure ecode is set for log fn. */
- tdb->ecode = TDB_ERR_CORRUPT;
- TDB_LOG((tdb, 0,"rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
- return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
- }
- return tdb_oob(tdb, rec->next+sizeof(*rec), 0);
-}
-static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
-{
- struct list_struct r = *rec;
- return tdb_write(tdb, offset, CONVERT(r), sizeof(r));
-}
-
-/* read a freelist record and check for simple errors */
-static int rec_free_read(TDB_CONTEXT *tdb, tdb_off off, struct list_struct *rec)
-{
- if (tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1)
- return -1;
-
- if (rec->magic == TDB_MAGIC) {
- /* this happens when a app is showdown while deleting a record - we should
- not completely fail when this happens */
- TDB_LOG((tdb, 0,"rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
- rec->magic, off));
- rec->magic = TDB_FREE_MAGIC;
- if (tdb_write(tdb, off, rec, sizeof(*rec)) == -1)
- return -1;
- }
-
- if (rec->magic != TDB_FREE_MAGIC) {
- /* Ensure ecode is set for log fn. */
- tdb->ecode = TDB_ERR_CORRUPT;
- TDB_LOG((tdb, 0,"rec_free_read bad magic 0x%x at offset=%d\n",
- rec->magic, off));
- return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
- }
- if (tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
- return -1;
- return 0;
-}
-
-/* update a record tailer (must hold allocation lock) */
-static int update_tailer(TDB_CONTEXT *tdb, tdb_off offset,
- const struct list_struct *rec)
-{
- tdb_off totalsize;
-
- /* Offset of tailer from record header */
- totalsize = sizeof(*rec) + rec->rec_len;
- return ofs_write(tdb, offset + totalsize - sizeof(tdb_off),
- &totalsize);
-}
-
-static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset)
-{
- struct list_struct rec;
- tdb_off tailer_ofs, tailer;
-
- if (tdb_read(tdb, offset, (char *)&rec, sizeof(rec), DOCONV()) == -1) {
- printf("ERROR: failed to read record at %u\n", offset);
- return 0;
- }
-
- printf(" rec: offset=0x%08x next=0x%08x rec_len=%d key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n",
- offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, rec.full_hash, rec.magic);
-
- tailer_ofs = offset + sizeof(rec) + rec.rec_len - sizeof(tdb_off);
- if (ofs_read(tdb, tailer_ofs, &tailer) == -1) {
- printf("ERROR: failed to read tailer at %u\n", tailer_ofs);
- return rec.next;
- }
-
- if (tailer != rec.rec_len + sizeof(rec)) {
- printf("ERROR: tailer does not match record! tailer=%u totalsize=%u\n",
- (unsigned int)tailer, (unsigned int)(rec.rec_len + sizeof(rec)));
- }
- return rec.next;
-}
-
-static int tdb_dump_chain(TDB_CONTEXT *tdb, int i)
-{
- tdb_off rec_ptr, top;
-
- top = TDB_HASH_TOP(i);
-
- if (tdb_lock(tdb, i, F_WRLCK) != 0)
- return -1;
-
- if (ofs_read(tdb, top, &rec_ptr) == -1)
- return tdb_unlock(tdb, i, F_WRLCK);
-
- if (rec_ptr)
- printf("hash=%d\n", i);
-
- while (rec_ptr) {
- rec_ptr = tdb_dump_record(tdb, rec_ptr);
- }
-
- return tdb_unlock(tdb, i, F_WRLCK);
-}
-
-void tdb_dump_all(TDB_CONTEXT *tdb)
-{
- int i;
- for (i=0;i<tdb->header.hash_size;i++) {
- tdb_dump_chain(tdb, i);
- }
- printf("freelist:\n");
- tdb_dump_chain(tdb, -1);
-}
-
-int tdb_printfreelist(TDB_CONTEXT *tdb)
-{
- int ret;
- long total_free = 0;
- tdb_off offset, rec_ptr;
- struct list_struct rec;
-
- if ((ret = tdb_lock(tdb, -1, F_WRLCK)) != 0)
- return ret;
-
- offset = FREELIST_TOP;
-
- /* read in the freelist top */
- if (ofs_read(tdb, offset, &rec_ptr) == -1) {
- tdb_unlock(tdb, -1, F_WRLCK);
- return 0;
- }
-
- printf("freelist top=[0x%08x]\n", rec_ptr );
- while (rec_ptr) {
- if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec), DOCONV()) == -1) {
- tdb_unlock(tdb, -1, F_WRLCK);
- return -1;
- }
-
- if (rec.magic != TDB_FREE_MAGIC) {
- printf("bad magic 0x%08x in free list\n", rec.magic);
- tdb_unlock(tdb, -1, F_WRLCK);
- return -1;
- }
-
- printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)] (end = 0x%08x)\n",
- rec_ptr, rec.rec_len, rec.rec_len, rec_ptr + rec.rec_len);
- total_free += rec.rec_len;
-
- /* move to the next record */
- rec_ptr = rec.next;
- }
- printf("total rec_len = [0x%08x (%d)]\n", (int)total_free,
- (int)total_free);
-
- return tdb_unlock(tdb, -1, F_WRLCK);
-}
-
-/* Remove an element from the freelist. Must have alloc lock. */
-static int remove_from_freelist(TDB_CONTEXT *tdb, tdb_off off, tdb_off next)
-{
- tdb_off last_ptr, i;
-
- /* read in the freelist top */
- last_ptr = FREELIST_TOP;
- while (ofs_read(tdb, last_ptr, &i) != -1 && i != 0) {
- if (i == off) {
- /* We've found it! */
- return ofs_write(tdb, last_ptr, &next);
- }
- /* Follow chain (next offset is at start of record) */
- last_ptr = i;
- }
- TDB_LOG((tdb, 0,"remove_from_freelist: not on list at off=%d\n", off));
- return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
-}
-
-/* Add an element into the freelist. Merge adjacent records if
- neccessary. */
-static int tdb_free(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
-{
- tdb_off right, left;
-
- /* Allocation and tailer lock */
- if (tdb_lock(tdb, -1, F_WRLCK) != 0)
- return -1;
-
- /* set an initial tailer, so if we fail we don't leave a bogus record */
- if (update_tailer(tdb, offset, rec) != 0) {
- TDB_LOG((tdb, 0, "tdb_free: upfate_tailer failed!\n"));
- goto fail;
- }
-
- /* Look right first (I'm an Australian, dammit) */
- right = offset + sizeof(*rec) + rec->rec_len;
- if (right + sizeof(*rec) <= tdb->map_size) {
- struct list_struct r;
-
- if (tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) {
- TDB_LOG((tdb, 0, "tdb_free: right read failed at %u\n", right));
- goto left;
- }
-
- /* If it's free, expand to include it. */
- if (r.magic == TDB_FREE_MAGIC) {
- if (remove_from_freelist(tdb, right, r.next) == -1) {
- TDB_LOG((tdb, 0, "tdb_free: right free failed at %u\n", right));
- goto left;
- }
- rec->rec_len += sizeof(r) + r.rec_len;
- }
- }
-
-left:
- /* Look left */
- left = offset - sizeof(tdb_off);
- if (left > TDB_DATA_START(tdb->header.hash_size)) {
- struct list_struct l;
- tdb_off leftsize;
-
- /* Read in tailer and jump back to header */
- if (ofs_read(tdb, left, &leftsize) == -1) {
- TDB_LOG((tdb, 0, "tdb_free: left offset read failed at %u\n", left));
- goto update;
- }
- left = offset - leftsize;
-
- /* Now read in record */
- if (tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) {
- TDB_LOG((tdb, 0, "tdb_free: left read failed at %u (%u)\n", left, leftsize));
- goto update;
- }
-
- /* If it's free, expand to include it. */
- if (l.magic == TDB_FREE_MAGIC) {
- if (remove_from_freelist(tdb, left, l.next) == -1) {
- TDB_LOG((tdb, 0, "tdb_free: left free failed at %u\n", left));
- goto update;
- } else {
- offset = left;
- rec->rec_len += leftsize;
- }
- }
- }
-
-update:
- if (update_tailer(tdb, offset, rec) == -1) {
- TDB_LOG((tdb, 0, "tdb_free: update_tailer failed at %u\n", offset));
- goto fail;
- }
-
- /* Now, prepend to free list */
- rec->magic = TDB_FREE_MAGIC;
-
- if (ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
- rec_write(tdb, offset, rec) == -1 ||
- ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
- TDB_LOG((tdb, 0, "tdb_free record write failed at offset=%d\n", offset));
- goto fail;
- }
-
- /* And we're done. */
- tdb_unlock(tdb, -1, F_WRLCK);
- return 0;
-
- fail:
- tdb_unlock(tdb, -1, F_WRLCK);
- return -1;
-}
-
-
-/* expand a file. we prefer to use ftruncate, as that is what posix
- says to use for mmap expansion */
-static int expand_file(TDB_CONTEXT *tdb, tdb_off size, tdb_off addition)
-{
- char buf[1024];
-#if HAVE_FTRUNCATE_EXTEND
- if (ftruncate(tdb->fd, size+addition) != 0) {
- TDB_LOG((tdb, 0, "expand_file ftruncate to %d failed (%s)\n",
- size+addition, strerror(errno)));
- return -1;
- }
-#else
- char b = 0;
-
-#ifdef HAVE_PWRITE
- if (pwrite(tdb->fd, &b, 1, (size+addition) - 1) != 1) {
-#else
- if (lseek(tdb->fd, (size+addition) - 1, SEEK_SET) != (size+addition) - 1 ||
- write(tdb->fd, &b, 1) != 1) {
-#endif
- TDB_LOG((tdb, 0, "expand_file to %d failed (%s)\n",
- size+addition, strerror(errno)));
- return -1;
- }
-#endif
-
- /* now fill the file with something. This ensures that the file isn't sparse, which would be
- very bad if we ran out of disk. This must be done with write, not via mmap */
- memset(buf, 0x42, sizeof(buf));
- while (addition) {
- int n = addition>sizeof(buf)?sizeof(buf):addition;
-#ifdef HAVE_PWRITE
- int ret = pwrite(tdb->fd, buf, n, size);
-#else
- int ret;
- if (lseek(tdb->fd, size, SEEK_SET) != size)
- return -1;
- ret = write(tdb->fd, buf, n);
-#endif
- if (ret != n) {
- TDB_LOG((tdb, 0, "expand_file write of %d failed (%s)\n",
- n, strerror(errno)));
- return -1;
- }
- addition -= n;
- size += n;
- }
- return 0;
-}
-
-
-/* expand the database at least size bytes by expanding the underlying
- file and doing the mmap again if necessary */
-static int tdb_expand(TDB_CONTEXT *tdb, tdb_off size)
-{
- struct list_struct rec;
- tdb_off offset;
-
- if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
- TDB_LOG((tdb, 0, "lock failed in tdb_expand\n"));
- return -1;
- }
-
- /* must know about any previous expansions by another process */
- tdb_oob(tdb, tdb->map_size + 1, 1);
-
- /* always make room for at least 10 more records, and round
- the database up to a multiple of TDB_PAGE_SIZE */
- size = TDB_ALIGN(tdb->map_size + size*10, TDB_PAGE_SIZE) - tdb->map_size;
-
- if (!(tdb->flags & TDB_INTERNAL))
- tdb_munmap(tdb);
-
- /*
- * We must ensure the file is unmapped before doing this
- * to ensure consistency with systems like OpenBSD where
- * writes and mmaps are not consistent.
- */
-
- /* expand the file itself */
- if (!(tdb->flags & TDB_INTERNAL)) {
- if (expand_file(tdb, tdb->map_size, size) != 0)
- goto fail;
- }
-
- tdb->map_size += size;
-
- if (tdb->flags & TDB_INTERNAL) {
- char *new_map_ptr = realloc(tdb->map_ptr, tdb->map_size);
- if (!new_map_ptr) {
- tdb->map_size -= size;
- goto fail;
- }
- tdb->map_ptr = new_map_ptr;
- } else {
- /*
- * We must ensure the file is remapped before adding the space
- * to ensure consistency with systems like OpenBSD where
- * writes and mmaps are not consistent.
- */
-
- /* We're ok if the mmap fails as we'll fallback to read/write */
- tdb_mmap(tdb);
- }
-
- /* form a new freelist record */
- memset(&rec,'\0',sizeof(rec));
- rec.rec_len = size - sizeof(rec);
-
- /* link it into the free list */
- offset = tdb->map_size - size;
- if (tdb_free(tdb, offset, &rec) == -1)
- goto fail;
-
- tdb_unlock(tdb, -1, F_WRLCK);
- return 0;
- fail:
- tdb_unlock(tdb, -1, F_WRLCK);
- return -1;
-}
-
-
-/*
- the core of tdb_allocate - called when we have decided which
- free list entry to use
- */
-static tdb_off tdb_allocate_ofs(TDB_CONTEXT *tdb, tdb_len length, tdb_off rec_ptr,
- struct list_struct *rec, tdb_off last_ptr)
-{
- struct list_struct newrec;
- tdb_off newrec_ptr;
-
- memset(&newrec, '\0', sizeof(newrec));
-
- /* found it - now possibly split it up */
- if (rec->rec_len > length + MIN_REC_SIZE) {
- /* Length of left piece */
- length = TDB_ALIGN(length, TDB_ALIGNMENT);
-
- /* Right piece to go on free list */
- newrec.rec_len = rec->rec_len - (sizeof(*rec) + length);
- newrec_ptr = rec_ptr + sizeof(*rec) + length;
-
- /* And left record is shortened */
- rec->rec_len = length;
- } else {
- newrec_ptr = 0;
- }
-
- /* Remove allocated record from the free list */
- if (ofs_write(tdb, last_ptr, &rec->next) == -1) {
- return 0;
- }
-
- /* Update header: do this before we drop alloc
- lock, otherwise tdb_free() might try to
- merge with us, thinking we're free.
- (Thanks Jeremy Allison). */
- rec->magic = TDB_MAGIC;
- if (rec_write(tdb, rec_ptr, rec) == -1) {
- return 0;
- }
-
- /* Did we create new block? */
- if (newrec_ptr) {
- /* Update allocated record tailer (we
- shortened it). */
- if (update_tailer(tdb, rec_ptr, rec) == -1) {
- return 0;
- }
-
- /* Free new record */
- if (tdb_free(tdb, newrec_ptr, &newrec) == -1) {
- return 0;
- }
- }
-
- /* all done - return the new record offset */
- return rec_ptr;
-}
-
-/* allocate some space from the free list. The offset returned points
- to a unconnected list_struct within the database with room for at
- least length bytes of total data
-
- 0 is returned if the space could not be allocated
- */
-static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length,
- struct list_struct *rec)
-{
- tdb_off rec_ptr, last_ptr, newrec_ptr;
- struct {
- tdb_off rec_ptr, last_ptr;
- tdb_len rec_len;
- } bestfit;
-
- if (tdb_lock(tdb, -1, F_WRLCK) == -1)
- return 0;
-
- /* Extra bytes required for tailer */
- length += sizeof(tdb_off);
-
- again:
- last_ptr = FREELIST_TOP;
-
- /* read in the freelist top */
- if (ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
- goto fail;
-
- bestfit.rec_ptr = 0;
-
- /*
- this is a best fit allocation strategy. Originally we used
- a first fit strategy, but it suffered from massive fragmentation
- issues when faced with a slowly increasing record size.
- */
- while (rec_ptr) {
- if (rec_free_read(tdb, rec_ptr, rec) == -1) {
- goto fail;
- }
-
- if (rec->rec_len >= length) {
- if (bestfit.rec_ptr == 0 ||
- rec->rec_len < bestfit.rec_len) {
- bestfit.rec_len = rec->rec_len;
- bestfit.rec_ptr = rec_ptr;
- bestfit.last_ptr = last_ptr;
- /* consider a fit to be good enough if we aren't wasting more than half the space */
- if (bestfit.rec_len < 2*length) {
- break;
- }
- }
- }
-
- /* move to the next record */
- last_ptr = rec_ptr;
- rec_ptr = rec->next;
- }
-
- if (bestfit.rec_ptr != 0) {
- if (rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
- goto fail;
- }
-
- newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr);
- tdb_unlock(tdb, -1, F_WRLCK);
- return newrec_ptr;
- }
-
- /* we didn't find enough space. See if we can expand the
- database and if we can then try again */
- if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
- goto again;
- fail:
- tdb_unlock(tdb, -1, F_WRLCK);
- return 0;
-}
-
-/* initialise a new database with a specified hash size */
-static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size)
-{
- struct tdb_header *newdb;
- int size, ret = -1;
-
- /* We make it up in memory, then write it out if not internal */
- size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off);
- if (!(newdb = calloc(size, 1)))
- return TDB_ERRCODE(TDB_ERR_OOM, -1);
-
- /* Fill in the header */
- newdb->version = TDB_VERSION;
- newdb->hash_size = hash_size;
-#ifdef USE_SPINLOCKS
- newdb->rwlocks = size;
-#endif
- if (tdb->flags & TDB_INTERNAL) {
- tdb->map_size = size;
- tdb->map_ptr = (char *)newdb;
- memcpy(&tdb->header, newdb, sizeof(tdb->header));
- /* Convert the `ondisk' version if asked. */
- CONVERT(*newdb);
- return 0;
- }
- if (lseek(tdb->fd, 0, SEEK_SET) == -1)
- goto fail;
-
- if (ftruncate(tdb->fd, 0) == -1)
- goto fail;
-
- /* This creates an endian-converted header, as if read from disk */
- CONVERT(*newdb);
- memcpy(&tdb->header, newdb, sizeof(tdb->header));
- /* Don't endian-convert the magic food! */
- memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
- if (write(tdb->fd, newdb, size) != size)
- ret = -1;
- else
- ret = tdb_create_rwlocks(tdb->fd, hash_size);
-
- fail:
- SAFE_FREE(newdb);
- return ret;
-}
-