2 Unix SMB/Netbios implementation.
4 Samba database functions
5 Copyright (C) Andrew Tridgell 1999-2000
6 Copyright (C) Luke Kenneth Casson Leighton 2000
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
44 #define TDB_MAGIC_FOOD "TDB file\n"
45 #define TDB_VERSION (0x26011967 + 6)
46 #define TDB_MAGIC (0x26011999U)
47 #define TDB_FREE_MAGIC (~TDB_MAGIC)
48 #define TDB_DEAD_MAGIC (0xFEE1DEAD)
49 #define TDB_ALIGNMENT 4
50 #define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGNMENT)
51 #define DEFAULT_HASH_SIZE 131
52 #define TDB_PAGE_SIZE 0x2000
53 #define FREELIST_TOP (sizeof(struct tdb_header))
54 #define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1))
55 #define TDB_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24))
56 #define TDB_DEAD(r) ((r)->magic == TDB_DEAD_MAGIC)
57 #define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r))
58 #define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off))
59 #define TDB_LOG(x) (tdb->log_fn?((tdb->log_fn x),0) : 0)
70 #define MAP_FAILED ((void *)-1)
73 #define BUCKET(hash) ((hash) % tdb->header.hash_size)
76 /* all contexts, to ensure no double-opens (fcntl locks don't nest!) */
77 static TDB_CONTEXT *tdbs = NULL;
79 static void tdb_munmap(TDB_CONTEXT *tdb)
81 if (tdb->flags & TDB_INTERNAL)
86 munmap(tdb->map_ptr, tdb->map_size);
91 static void tdb_mmap(TDB_CONTEXT *tdb)
93 if (tdb->flags & TDB_INTERNAL)
97 if (!(tdb->flags & TDB_NOMMAP)) {
98 tdb->map_ptr = mmap(NULL, tdb->map_size,
99 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
100 MAP_SHARED|MAP_FILE, tdb->fd, 0);
103 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
106 if (tdb->map_ptr == MAP_FAILED) {
108 TDB_LOG((tdb, 2, "tdb_mmap failed for size %d (%s)\n",
109 tdb->map_size, strerror(errno)));
119 /* Endian conversion: we only ever deal with 4 byte quantities */
120 static void *convert(void *buf, u32 size)
123 for (i = 0; i < size / 4; i++)
124 p[i] = TDB_BYTEREV(p[i]);
127 #define DOCONV() (tdb->flags & TDB_CONVERT)
128 #define CONVERT(x) (DOCONV() ? convert(&x, sizeof(x)) : &x)
130 /* the body of the database is made of one list_struct for the free space
131 plus a separate data list for each hash value */
133 tdb_off next; /* offset of the next record in the list */
134 tdb_len rec_len; /* total byte length of record */
135 tdb_len key_len; /* byte length of key */
136 tdb_len data_len; /* byte length of data */
137 u32 full_hash; /* the full 32 bit hash of the key */
138 u32 magic; /* try to catch errors */
139 /* the following union is implied:
141 char record[rec_len];
146 u32 totalsize; (tailer)
151 /* a byte range locking function - return 0 on success
152 this functions locks/unlocks 1 byte at the specified offset.
154 On error, errno is also set so that errors are passed back properly
155 through tdb_open(). */
156 static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset,
157 int rw_type, int lck_type, int probe)
161 if (tdb->flags & TDB_NOLOCK)
163 if (tdb->read_only) {
169 fl.l_whence = SEEK_SET;
174 if (fcntl(tdb->fd,lck_type,&fl)) {
176 TDB_LOG((tdb, 5,"tdb_brlock failed at offset %d rw_type=%d lck_type=%d\n",
177 offset, rw_type, lck_type));
179 /* errno set by fcntl */
180 return TDB_ERRCODE(TDB_ERR_LOCK, -1);
185 /* lock a list in the database. list -1 is the alloc list */
186 static int tdb_lock(TDB_CONTEXT *tdb, int list, int ltype)
188 if (list < -1 || list >= (int)tdb->header.hash_size) {
189 TDB_LOG((tdb, 0,"tdb_lock: invalid list %d for ltype=%d\n",
193 if (tdb->flags & TDB_NOLOCK)
196 /* Since fcntl locks don't nest, we do a lock for the first one,
197 and simply bump the count for future ones */
198 if (tdb->locked[list+1].count == 0) {
199 if (!tdb->read_only && tdb->header.rwlocks) {
200 if (tdb_spinlock(tdb, list, ltype)) {
201 TDB_LOG((tdb, 0, "tdb_lock spinlock on list ltype=%d\n",
205 } else if (tdb_brlock(tdb,FREELIST_TOP+4*list,ltype,F_SETLKW, 0)) {
206 TDB_LOG((tdb, 0,"tdb_lock failed on list %d ltype=%d (%s)\n",
207 list, ltype, strerror(errno)));
210 tdb->locked[list+1].ltype = ltype;
212 tdb->locked[list+1].count++;
216 /* unlock the database: returns void because it's too late for errors. */
217 static void tdb_unlock(TDB_CONTEXT *tdb, int list, int ltype)
219 if (tdb->flags & TDB_NOLOCK)
223 if (list < -1 || list >= (int)tdb->header.hash_size)
225 if (tdb->locked[list+1].count==0)
228 if (tdb->locked[list+1].count == 1) {
229 /* Down to last nested lock: unlock underneath */
230 if (!tdb->read_only && tdb->header.rwlocks)
231 tdb_spinunlock(tdb, list, ltype);
233 tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK, F_SETLKW, 0);
235 tdb->locked[list+1].count--;
238 /* This is based on the hash agorithm from gdbm */
239 static u32 tdb_hash(TDB_DATA *key)
241 u32 value; /* Used to compute the hash value. */
242 u32 i; /* Used to cycle through random values. */
244 /* Set the initial value from the key size. */
245 for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++)
246 value = (value + (key->dptr[i] << (i*5 % 24)));
248 return (1103515243 * value + 12345);
251 /* check for an out of bounds access - if it is out of bounds then
252 see if the database has been expanded by someone else and expand
254 note that "len" is the minimum length needed for the db
256 static int tdb_oob(TDB_CONTEXT *tdb, tdb_off len, int probe)
259 if (len <= tdb->map_size)
261 if (tdb->flags & TDB_INTERNAL) {
263 TDB_LOG((tdb, 0,"tdb_oob len %d beyond internal malloc size %d\n",
264 (int)len, (int)tdb->map_size));
266 return TDB_ERRCODE(TDB_ERR_IO, -1);
269 if (fstat(tdb->fd, &st) == -1)
270 return TDB_ERRCODE(TDB_ERR_IO, -1);
272 if (st.st_size < (size_t)len) {
274 TDB_LOG((tdb, 0,"tdb_oob len %d beyond eof at %d\n",
275 (int)len, (int)st.st_size));
277 return TDB_ERRCODE(TDB_ERR_IO, -1);
280 /* Unmap, update size, remap */
282 tdb->map_size = st.st_size;
287 /* write a lump of data at a specified offset */
288 static int tdb_write(TDB_CONTEXT *tdb, tdb_off off, void *buf, tdb_len len)
290 if (tdb_oob(tdb, off + len, 0) != 0)
294 memcpy(off + (char *)tdb->map_ptr, buf, len);
296 else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {
298 else if (lseek(tdb->fd, off, SEEK_SET) != off
299 || write(tdb->fd, buf, len) != (ssize_t)len) {
301 TDB_LOG((tdb, 0,"tdb_write failed at %d len=%d (%s)\n",
302 off, len, strerror(errno)));
303 return TDB_ERRCODE(TDB_ERR_IO, -1);
308 /* read a lump of data at a specified offset, maybe convert */
309 static int tdb_read(TDB_CONTEXT *tdb,tdb_off off,void *buf,tdb_len len,int cv)
311 if (tdb_oob(tdb, off + len, 0) != 0)
315 memcpy(buf, off + (char *)tdb->map_ptr, len);
317 else if (pread(tdb->fd, buf, len, off) != (ssize_t)len) {
319 else if (lseek(tdb->fd, off, SEEK_SET) != off
320 || read(tdb->fd, buf, len) != (ssize_t)len) {
322 TDB_LOG((tdb, 0,"tdb_read failed at %d len=%d (%s)\n",
323 off, len, strerror(errno)));
324 return TDB_ERRCODE(TDB_ERR_IO, -1);
331 /* read a lump of data, allocating the space for it */
332 static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len)
336 if (!(buf = malloc(len))) {
337 TDB_LOG((tdb, 0,"tdb_alloc_read malloc failed len=%d (%s)\n",
338 len, strerror(errno)));
339 return TDB_ERRCODE(TDB_ERR_OOM, buf);
341 if (tdb_read(tdb, offset, buf, len, 0) == -1) {
348 /* read/write a tdb_off */
349 static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
351 return tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
353 static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
356 return tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
359 /* read/write a record */
360 static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
362 if (tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
364 if (TDB_BAD_MAGIC(rec)) {
365 TDB_LOG((tdb, 0,"rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
366 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
368 return tdb_oob(tdb, rec->next+sizeof(*rec), 0);
370 static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
372 struct list_struct r = *rec;
373 return tdb_write(tdb, offset, CONVERT(r), sizeof(r));
376 /* read a freelist record and check for simple errors */
377 static int rec_free_read(TDB_CONTEXT *tdb, tdb_off off, struct list_struct *rec)
379 if (tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1)
381 if (rec->magic != TDB_FREE_MAGIC) {
382 TDB_LOG((tdb, 0,"rec_free_read bad magic 0x%x at offset=%d\n",
384 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
386 if (tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
391 /* update a record tailer (must hold allocation lock) */
392 static int update_tailer(TDB_CONTEXT *tdb, tdb_off offset,
393 const struct list_struct *rec)
397 /* Offset of tailer from record header */
398 totalsize = sizeof(*rec) + rec->rec_len;
399 return ofs_write(tdb, offset + totalsize - sizeof(tdb_off),
403 static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset)
405 struct list_struct rec;
406 tdb_off tailer_ofs, tailer;
408 if (tdb_read(tdb, offset, (char *)&rec, sizeof(rec), DOCONV()) == -1) {
409 printf("ERROR: failed to read record at %u\n", offset);
413 printf(" rec: offset=%u next=%d rec_len=%d key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n",
414 offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, rec.full_hash, rec.magic);
416 tailer_ofs = offset + sizeof(rec) + rec.rec_len - sizeof(tdb_off);
417 if (ofs_read(tdb, tailer_ofs, &tailer) == -1) {
418 printf("ERROR: failed to read tailer at %u\n", tailer_ofs);
422 if (tailer != rec.rec_len + sizeof(rec)) {
423 printf("ERROR: tailer does not match record! tailer=%u totalsize=%u\n", tailer, rec.rec_len + sizeof(rec));
428 static void tdb_dump_chain(TDB_CONTEXT *tdb, int i)
430 tdb_off rec_ptr, top;
432 top = TDB_HASH_TOP(i);
434 tdb_lock(tdb, i, F_WRLCK);
436 if (ofs_read(tdb, top, &rec_ptr) == -1) {
437 tdb_unlock(tdb, i, F_WRLCK);
442 printf("hash=%d\n", i);
445 rec_ptr = tdb_dump_record(tdb, rec_ptr);
447 tdb_unlock(tdb, i, F_WRLCK);
450 void tdb_dump_all(TDB_CONTEXT *tdb)
453 for (i=0;i<tdb->header.hash_size;i++) {
454 tdb_dump_chain(tdb, i);
456 printf("freelist:\n");
457 tdb_dump_chain(tdb, -1);
460 void tdb_printfreelist(TDB_CONTEXT *tdb)
463 tdb_off offset, rec_ptr, last_ptr;
464 struct list_struct rec;
466 tdb_lock(tdb, -1, F_WRLCK);
469 offset = FREELIST_TOP;
471 /* read in the freelist top */
472 if (ofs_read(tdb, offset, &rec_ptr) == -1) {
476 printf("freelist top=[0x%08x]\n", rec_ptr );
478 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec), DOCONV()) == -1) {
482 if (rec.magic != TDB_FREE_MAGIC) {
483 printf("bad magic 0x%08x in free list\n", rec.magic);
487 printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)]\n", rec.next, rec.rec_len, rec.rec_len );
488 total_free += rec.rec_len;
490 /* move to the next record */
493 printf("total rec_len = [0x%08x (%d)]\n", (int)total_free,
496 tdb_unlock(tdb, -1, F_WRLCK);
499 /* Remove an element from the freelist. Must have alloc lock. */
500 static int remove_from_freelist(TDB_CONTEXT *tdb, tdb_off off, tdb_off next)
504 /* read in the freelist top */
505 last_ptr = FREELIST_TOP;
506 while (ofs_read(tdb, last_ptr, &i) != -1 && i != 0) {
508 /* We've found it! */
509 return ofs_write(tdb, last_ptr, &next);
511 /* Follow chain (next offset is at start of record) */
514 TDB_LOG((tdb, 0,"remove_from_freelist: not on list at off=%d\n", off));
515 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
518 /* Add an element into the freelist. Merge adjacent records if
520 static int tdb_free(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
524 /* Allocation and tailer lock */
525 if (tdb_lock(tdb, -1, F_WRLCK) != 0)
528 /* set an initial tailer, so if we fail we don't leave a bogus record */
529 update_tailer(tdb, offset, rec);
531 /* Look right first (I'm an Australian, dammit) */
532 right = offset + sizeof(*rec) + rec->rec_len;
533 if (right + sizeof(*rec) <= tdb->map_size) {
534 struct list_struct r;
536 if (tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) {
537 TDB_LOG((tdb, 0, "tdb_free: right read failed at %u\n", right));
541 /* If it's free, expand to include it. */
542 if (r.magic == TDB_FREE_MAGIC) {
543 if (remove_from_freelist(tdb, right, r.next) == -1) {
544 TDB_LOG((tdb, 0, "tdb_free: right free failed at %u\n", right));
547 rec->rec_len += sizeof(r) + r.rec_len;
553 left = offset - sizeof(tdb_off);
554 if (left > TDB_HASH_TOP(tdb->header.hash_size-1)) {
555 struct list_struct l;
558 /* Read in tailer and jump back to header */
559 if (ofs_read(tdb, left, &leftsize) == -1) {
560 TDB_LOG((tdb, 0, "tdb_free: left offset read failed at %u\n", left));
563 left = offset - leftsize;
565 /* Now read in record */
566 if (tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) {
567 TDB_LOG((tdb, 0, "tdb_free: left read failed at %u (%u)\n", left, leftsize));
571 /* If it's free, expand to include it. */
572 if (l.magic == TDB_FREE_MAGIC) {
573 if (remove_from_freelist(tdb, left, l.next) == -1) {
574 TDB_LOG((tdb, 0, "tdb_free: left free failed at %u\n", left));
578 rec->rec_len += leftsize;
584 if (update_tailer(tdb, offset, rec) == -1) {
585 TDB_LOG((tdb, 0, "tdb_free: update_tailer failed at %u\n", offset));
589 /* Now, prepend to free list */
590 rec->magic = TDB_FREE_MAGIC;
592 if (ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
593 rec_write(tdb, offset, rec) == -1 ||
594 ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
595 TDB_LOG((tdb, 0, "tdb_free record write failed at offset=%d\n", offset));
599 /* And we're done. */
600 tdb_unlock(tdb, -1, F_WRLCK);
604 tdb_unlock(tdb, -1, F_WRLCK);
609 /* expand a file. we prefer to use ftruncate, as that is what posix
610 says to use for mmap expansion */
611 static int expand_file(TDB_CONTEXT *tdb, tdb_off size, tdb_off addition)
614 #if HAVE_FTRUNCATE_EXTEND
615 if (ftruncate(tdb->fd, size+addition) != 0) {
616 TDB_LOG((tdb, 0, "expand_file ftruncate to %d failed (%s)\n",
617 size+addition, strerror(errno)));
624 if (pwrite(tdb->fd, &b, 1, (size+addition) - 1) != 1) {
626 if (lseek(tdb->fd, (size+addition) - 1, SEEK_SET) != (size+addition) - 1 ||
627 write(tdb->fd, &b, 1) != 1) {
629 TDB_LOG((tdb, 0, "expand_file to %d failed (%s)\n",
630 size+addition, strerror(errno)));
635 /* now fill the file with something. This ensures that the file isn't sparse, which would be
636 very bad if we ran out of disk. This must be done with write, not via mmap */
637 memset(buf, 0x42, sizeof(buf));
639 int n = addition>sizeof(buf)?sizeof(buf):addition;
641 int ret = pwrite(tdb->fd, buf, n, size);
644 if (lseek(tdb->fd, size, SEEK_SET) != size)
646 ret = write(tdb->fd, buf, n);
649 TDB_LOG((tdb, 0, "expand_file write of %d failed (%s)\n",
650 n, strerror(errno)));
660 /* expand the database at least size bytes by expanding the underlying
661 file and doing the mmap again if necessary */
662 static int tdb_expand(TDB_CONTEXT *tdb, tdb_off size)
664 struct list_struct rec;
667 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
668 TDB_LOG((tdb, 0, "lock failed in tdb_expand\n"));
672 /* must know about any previous expansions by another process */
673 tdb_oob(tdb, tdb->map_size + 1, 1);
675 /* always make room for at least 10 more records, and round
676 the database up to a multiple of TDB_PAGE_SIZE */
677 size = TDB_ALIGN(tdb->map_size + size*10, TDB_PAGE_SIZE) - tdb->map_size;
679 if (!(tdb->flags & TDB_INTERNAL))
683 * We must ensure the file is unmapped before doing this
684 * to ensure consistency with systems like OpenBSD where
685 * writes and mmaps are not consistent.
688 /* expand the file itself */
689 if (!(tdb->flags & TDB_INTERNAL)) {
690 if (expand_file(tdb, tdb->map_size, size) != 0)
694 tdb->map_size += size;
696 if (tdb->flags & TDB_INTERNAL)
697 tdb->map_ptr = realloc(tdb->map_ptr, tdb->map_size);
700 * We must ensure the file is remapped before adding the space
701 * to ensure consistency with systems like OpenBSD where
702 * writes and mmaps are not consistent.
705 /* We're ok if the mmap fails as we'll fallback to read/write */
709 /* form a new freelist record */
710 memset(&rec,'\0',sizeof(rec));
711 rec.rec_len = size - sizeof(rec);
713 /* link it into the free list */
714 offset = tdb->map_size - size;
715 if (tdb_free(tdb, offset, &rec) == -1)
718 tdb_unlock(tdb, -1, F_WRLCK);
721 tdb_unlock(tdb, -1, F_WRLCK);
725 /* allocate some space from the free list. The offset returned points
726 to a unconnected list_struct within the database with room for at
727 least length bytes of total data
729 0 is returned if the space could not be allocated
731 static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length,
732 struct list_struct *rec)
734 tdb_off rec_ptr, last_ptr, newrec_ptr;
735 struct list_struct newrec;
737 if (tdb_lock(tdb, -1, F_WRLCK) == -1)
740 /* Extra bytes required for tailer */
741 length += sizeof(tdb_off);
744 last_ptr = FREELIST_TOP;
746 /* read in the freelist top */
747 if (ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
750 /* keep looking until we find a freelist record big enough */
752 if (rec_free_read(tdb, rec_ptr, rec) == -1)
755 if (rec->rec_len >= length) {
756 /* found it - now possibly split it up */
757 if (rec->rec_len > length + MIN_REC_SIZE) {
758 /* Length of left piece */
759 length = TDB_ALIGN(length, TDB_ALIGNMENT);
761 /* Right piece to go on free list */
762 newrec.rec_len = rec->rec_len
763 - (sizeof(*rec) + length);
764 newrec_ptr = rec_ptr + sizeof(*rec) + length;
766 /* And left record is shortened */
767 rec->rec_len = length;
771 /* Remove allocated record from the free list */
772 if (ofs_write(tdb, last_ptr, &rec->next) == -1)
775 /* Update header: do this before we drop alloc
776 lock, otherwise tdb_free() might try to
777 merge with us, thinking we're free.
778 (Thanks Jeremy Allison). */
779 rec->magic = TDB_MAGIC;
780 if (rec_write(tdb, rec_ptr, rec) == -1)
783 /* Did we create new block? */
785 /* Update allocated record tailer (we
787 if (update_tailer(tdb, rec_ptr, rec) == -1)
790 /* Free new record */
791 if (tdb_free(tdb, newrec_ptr, &newrec) == -1)
795 /* all done - return the new record offset */
796 tdb_unlock(tdb, -1, F_WRLCK);
799 /* move to the next record */
803 /* we didn't find enough space. See if we can expand the
804 database and if we can then try again */
805 if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
808 tdb_unlock(tdb, -1, F_WRLCK);
812 /* initialise a new database with a specified hash size */
813 static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size)
815 struct tdb_header *newdb;
818 /* We make it up in memory, then write it out if not internal */
819 size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off);
820 if (!(newdb = calloc(size, 1)))
821 return TDB_ERRCODE(TDB_ERR_OOM, -1);
823 /* Fill in the header */
824 newdb->version = TDB_VERSION;
825 newdb->hash_size = hash_size;
827 newdb->rwlocks = size;
829 if (tdb->flags & TDB_INTERNAL) {
830 tdb->map_size = size;
831 tdb->map_ptr = (char *)newdb;
832 memcpy(&tdb->header, newdb, sizeof(tdb->header));
833 /* Convert the `ondisk' version if asked. */
837 if (lseek(tdb->fd, 0, SEEK_SET) == -1)
840 if (ftruncate(tdb->fd, 0) == -1)
843 /* This creates an endian-converted header, as if read from disk */
845 memcpy(&tdb->header, newdb, sizeof(tdb->header));
846 /* Don't endian-convert the magic food! */
847 memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
848 if (write(tdb->fd, newdb, size) != size)
851 ret = tdb_create_rwlocks(tdb->fd, hash_size);
858 /* Returns 0 on fail. On success, return offset of record, and fills
860 static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash,
861 struct list_struct *r)
865 /* read in the hash top */
866 if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
869 /* keep looking until we find the right record */
871 if (rec_read(tdb, rec_ptr, r) == -1)
874 if (!TDB_DEAD(r) && hash==r->full_hash && key.dsize==r->key_len) {
876 /* a very likely hit - read the key */
877 k = tdb_alloc_read(tdb, rec_ptr + sizeof(*r),
882 if (memcmp(key.dptr, k, key.dsize) == 0) {
890 return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
893 /* If they do lockkeys, check that this hash is one they locked */
894 static int tdb_keylocked(TDB_CONTEXT *tdb, u32 hash)
897 if (!tdb->lockedkeys)
899 for (i = 0; i < tdb->lockedkeys[0]; i++)
900 if (tdb->lockedkeys[i+1] == hash)
902 return TDB_ERRCODE(TDB_ERR_NOLOCK, 0);
905 /* As tdb_find, but if you succeed, keep the lock */
906 static tdb_off tdb_find_lock(TDB_CONTEXT *tdb, TDB_DATA key, int locktype,
907 struct list_struct *rec)
911 hash = tdb_hash(&key);
912 if (!tdb_keylocked(tdb, hash))
914 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
916 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
917 tdb_unlock(tdb, BUCKET(hash), locktype);
921 enum TDB_ERROR tdb_error(TDB_CONTEXT *tdb)
926 static struct tdb_errname {
927 enum TDB_ERROR ecode; const char *estring;
928 } emap[] = { {TDB_SUCCESS, "Success"},
929 {TDB_ERR_CORRUPT, "Corrupt database"},
930 {TDB_ERR_IO, "IO Error"},
931 {TDB_ERR_LOCK, "Locking error"},
932 {TDB_ERR_OOM, "Out of memory"},
933 {TDB_ERR_EXISTS, "Record exists"},
934 {TDB_ERR_NOLOCK, "Lock exists on other keys"},
935 {TDB_ERR_NOEXIST, "Record does not exist"} };
937 /* Error string for the last tdb error */
938 const char *tdb_errorstr(TDB_CONTEXT *tdb)
941 for (i = 0; i < sizeof(emap) / sizeof(struct tdb_errname); i++)
942 if (tdb->ecode == emap[i].ecode)
943 return emap[i].estring;
944 return "Invalid error code";
947 /* update an entry in place - this only works if the new data size
948 is <= the old data size and the key exists.
951 static int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf)
953 struct list_struct rec;
958 if (!(rec_ptr = tdb_find_lock(tdb, key, F_WRLCK, &rec)))
961 /* must be long enough key, data and tailer */
962 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off)) {
963 tdb->ecode = TDB_SUCCESS; /* Not really an error */
967 if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
968 dbuf.dptr, dbuf.dsize) == -1)
971 if (dbuf.dsize != rec.data_len) {
973 rec.data_len = dbuf.dsize;
974 ret = rec_write(tdb, rec_ptr, &rec);
978 tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK);
982 /* find an entry in the database given a key */
983 TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key)
986 struct list_struct rec;
989 /* find which hash bucket it is in */
990 if (!(rec_ptr = tdb_find_lock(tdb,key,F_RDLCK,&rec)))
993 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
995 ret.dsize = rec.data_len;
996 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
1000 /* check if an entry in the database exists
1002 note that 1 is returned if the key is found and 0 is returned if not found
1003 this doesn't match the conventions in the rest of this module, but is
1004 compatible with gdbm
1006 int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key)
1008 struct list_struct rec;
1010 if (tdb_find_lock(tdb, key, F_RDLCK, &rec) == 0)
1012 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
1016 /* record lock stops delete underneath */
1017 static int lock_record(TDB_CONTEXT *tdb, tdb_off off)
1019 return off ? tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0) : 0;
1022 Write locks override our own fcntl readlocks, so check it here.
1023 Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
1024 an error to fail to get the lock here.
1027 static int write_lock_record(TDB_CONTEXT *tdb, tdb_off off)
1029 struct tdb_traverse_lock *i;
1030 for (i = &tdb->travlocks; i; i = i->next)
1033 return tdb_brlock(tdb, off, F_WRLCK, F_SETLK, 1);
1037 Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
1038 an error to fail to get the lock here.
1041 static int write_unlock_record(TDB_CONTEXT *tdb, tdb_off off)
1043 return tdb_brlock(tdb, off, F_UNLCK, F_SETLK, 0);
1045 /* fcntl locks don't stack: avoid unlocking someone else's */
1046 static int unlock_record(TDB_CONTEXT *tdb, tdb_off off)
1048 struct tdb_traverse_lock *i;
1053 for (i = &tdb->travlocks; i; i = i->next)
1056 return (count == 1 ? tdb_brlock(tdb, off, F_UNLCK, F_SETLKW, 0) : 0);
1059 /* actually delete an entry in the database given the offset */
1060 static int do_delete(TDB_CONTEXT *tdb, tdb_off rec_ptr, struct list_struct*rec)
1062 tdb_off last_ptr, i;
1063 struct list_struct lastrec;
1065 if (tdb->read_only) return -1;
1067 if (write_lock_record(tdb, rec_ptr) == -1) {
1068 /* Someone traversing here: mark it as dead */
1069 rec->magic = TDB_DEAD_MAGIC;
1070 return rec_write(tdb, rec_ptr, rec);
1072 write_unlock_record(tdb, rec_ptr);
1074 /* find previous record in hash chain */
1075 if (ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
1077 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
1078 if (rec_read(tdb, i, &lastrec) == -1)
1081 /* unlink it: next ptr is at start of record. */
1083 last_ptr = TDB_HASH_TOP(rec->full_hash);
1084 if (ofs_write(tdb, last_ptr, &rec->next) == -1)
1087 /* recover the space */
1088 if (tdb_free(tdb, rec_ptr, rec) == -1)
1093 /* Uses traverse lock: 0 = finish, -1 = error, other = record offset */
1094 static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,
1095 struct list_struct *rec)
1097 int want_next = (tlock->off != 0);
1099 /* No traversal allows if you've called tdb_lockkeys() */
1100 if (tdb->lockedkeys)
1101 return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
1103 /* Lock each chain from the start one. */
1104 for (; tlock->hash < tdb->header.hash_size; tlock->hash++) {
1105 if (tdb_lock(tdb, tlock->hash, F_WRLCK) == -1)
1108 /* No previous record? Start at top of chain. */
1110 if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash),
1114 /* Otherwise unlock the previous record. */
1115 unlock_record(tdb, tlock->off);
1119 /* We have offset of old record: grab next */
1120 if (rec_read(tdb, tlock->off, rec) == -1)
1122 tlock->off = rec->next;
1125 /* Iterate through chain */
1126 while( tlock->off) {
1128 if (rec_read(tdb, tlock->off, rec) == -1)
1130 if (!TDB_DEAD(rec)) {
1131 /* Woohoo: we found one! */
1132 lock_record(tdb, tlock->off);
1135 /* Try to clean dead ones from old traverses */
1136 current = tlock->off;
1137 tlock->off = rec->next;
1138 do_delete(tdb, current, rec);
1140 tdb_unlock(tdb, tlock->hash, F_WRLCK);
1143 /* We finished iteration without finding anything */
1144 return TDB_ERRCODE(TDB_SUCCESS, 0);
1148 tdb_unlock(tdb, tlock->hash, F_WRLCK);
1152 /* traverse the entire database - calling fn(tdb, key, data) on each element.
1153 return -1 on error or the record count traversed
1154 if fn is NULL then it is not called
1155 a non-zero return value from fn() indicates that the traversal should stop
1157 int tdb_traverse(TDB_CONTEXT *tdb, tdb_traverse_func fn, void *state)
1160 struct list_struct rec;
1161 struct tdb_traverse_lock tl = { NULL, 0, 0 };
1164 /* This was in the initializaton, above, but the IRIX compiler
1165 * did not like it. crh
1167 tl.next = tdb->travlocks.next;
1169 /* fcntl locks don't stack: beware traverse inside traverse */
1170 tdb->travlocks.next = &tl;
1172 /* tdb_next_lock places locks on the record returned, and its chain */
1173 while ((ret = tdb_next_lock(tdb, &tl, &rec)) > 0) {
1175 /* now read the full record */
1176 key.dptr = tdb_alloc_read(tdb, tl.off + sizeof(rec),
1177 rec.key_len + rec.data_len);
1179 tdb_unlock(tdb, tl.hash, F_WRLCK);
1180 unlock_record(tdb, tl.off);
1181 tdb->travlocks.next = tl.next;
1184 key.dsize = rec.key_len;
1185 dbuf.dptr = key.dptr + rec.key_len;
1186 dbuf.dsize = rec.data_len;
1188 /* Drop chain lock, call out */
1189 tdb_unlock(tdb, tl.hash, F_WRLCK);
1190 if (fn && fn(tdb, key, dbuf, state)) {
1191 /* They want us to terminate traversal */
1192 unlock_record(tdb, tl.off);
1193 tdb->travlocks.next = tl.next;
1199 tdb->travlocks.next = tl.next;
1206 /* find the first entry in the database and return its key */
1207 TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb)
1210 struct list_struct rec;
1212 /* release any old lock */
1213 unlock_record(tdb, tdb->travlocks.off);
1214 tdb->travlocks.off = tdb->travlocks.hash = 0;
1216 if (tdb_next_lock(tdb, &tdb->travlocks, &rec) <= 0)
1218 /* now read the key */
1219 key.dsize = rec.key_len;
1220 key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize);
1221 tdb_unlock(tdb, BUCKET(tdb->travlocks.hash), F_WRLCK);
1225 /* find the next entry in the database, returning its key */
1226 TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA oldkey)
1229 TDB_DATA key = tdb_null;
1230 struct list_struct rec;
1233 /* Is locked key the old key? If so, traverse will be reliable. */
1234 if (tdb->travlocks.off) {
1235 if (tdb_lock(tdb,tdb->travlocks.hash,F_WRLCK))
1237 if (rec_read(tdb, tdb->travlocks.off, &rec) == -1
1238 || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),
1240 || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) {
1241 /* No, it wasn't: unlock it and start from scratch */
1242 unlock_record(tdb, tdb->travlocks.off);
1243 tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK);
1244 tdb->travlocks.off = 0;
1251 if (!tdb->travlocks.off) {
1252 /* No previous element: do normal find, and lock record */
1253 tdb->travlocks.off = tdb_find_lock(tdb, oldkey, F_WRLCK, &rec);
1254 if (!tdb->travlocks.off)
1256 tdb->travlocks.hash = BUCKET(rec.full_hash);
1257 lock_record(tdb, tdb->travlocks.off);
1259 oldhash = tdb->travlocks.hash;
1261 /* Grab next record: locks chain and returned record,
1262 unlocks old record */
1263 if (tdb_next_lock(tdb, &tdb->travlocks, &rec) > 0) {
1264 key.dsize = rec.key_len;
1265 key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec),
1267 /* Unlock the chain of this new record */
1268 tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK);
1270 /* Unlock the chain of old record */
1271 tdb_unlock(tdb, BUCKET(oldhash), F_WRLCK);
1275 /* delete an entry in the database given a key */
1276 int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key)
1279 struct list_struct rec;
1282 if (!(rec_ptr = tdb_find_lock(tdb, key, F_WRLCK, &rec)))
1284 ret = do_delete(tdb, rec_ptr, &rec);
1285 tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK);
1289 /* store an element in the database, replacing any existing element
1292 return 0 on success, -1 on failure
1294 int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
1296 struct list_struct rec;
1302 /* find which hash bucket it is in */
1303 hash = tdb_hash(&key);
1304 if (!tdb_keylocked(tdb, hash))
1306 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
1309 /* check for it existing, on insert. */
1310 if (flag == TDB_INSERT) {
1311 if (tdb_exists(tdb, key)) {
1312 tdb->ecode = TDB_ERR_EXISTS;
1316 /* first try in-place update, on modify or replace. */
1317 if (tdb_update(tdb, key, dbuf) == 0)
1319 if (flag == TDB_MODIFY && tdb->ecode == TDB_ERR_NOEXIST)
1322 /* reset the error code potentially set by the tdb_update() */
1323 tdb->ecode = TDB_SUCCESS;
1325 /* delete any existing record - if it doesn't exist we don't
1326 care. Doing this first reduces fragmentation, and avoids
1327 coalescing with `allocated' block before it's updated. */
1328 if (flag != TDB_INSERT)
1329 tdb_delete(tdb, key);
1331 /* Copy key+value *before* allocating free space in case malloc
1332 fails and we are left with a dead spot in the tdb. */
1334 if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
1335 tdb->ecode = TDB_ERR_OOM;
1339 memcpy(p, key.dptr, key.dsize);
1340 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
1342 /* now we're into insert / modify / replace of a record which
1343 * we know could not be optimised by an in-place store (for
1344 * various reasons). */
1345 if (!(rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec)))
1348 /* Read hash top into next ptr */
1349 if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
1352 rec.key_len = key.dsize;
1353 rec.data_len = dbuf.dsize;
1354 rec.full_hash = hash;
1355 rec.magic = TDB_MAGIC;
1357 /* write out and point the top of the hash chain at it */
1358 if (rec_write(tdb, rec_ptr, &rec) == -1
1359 || tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
1360 || ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
1362 /* Need to tdb_unallocate() here */
1368 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
1372 /* open the database, creating it if necessary
1374 The open_flags and mode are passed straight to the open call on the
1375 database file. A flags value of O_WRONLY is invalid. The hash size
1376 is advisory, use zero for a default value.
1378 Return is NULL on error, in which case errno is also set. Don't
1379 try to call tdb_error or tdb_errname, just do strerror(errno). */
1380 TDB_CONTEXT *tdb_open(char *name, int hash_size, int tdb_flags,
1381 int open_flags, mode_t mode)
1383 return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL);
1386 TDB_CONTEXT *tdb_open_ex(char *name, int hash_size, int tdb_flags,
1387 int open_flags, mode_t mode,
1388 tdb_log_func log_fn)
1390 TDB_CONTEXT tdb, *ret, *i;
1392 int rev = 0, locked;
1394 memset(&tdb, 0, sizeof(tdb));
1398 tdb.lockedkeys = NULL;
1399 tdb.flags = tdb_flags;
1400 tdb.open_flags = open_flags;
1401 tdb.log_fn = log_fn;
1403 if ((open_flags & O_ACCMODE) == O_WRONLY) {
1409 hash_size = DEFAULT_HASH_SIZE;
1410 if ((open_flags & O_ACCMODE) == O_RDONLY) {
1412 /* read only databases don't do locking or clear if first */
1413 tdb.flags |= TDB_NOLOCK;
1414 tdb.flags &= ~TDB_CLEAR_IF_FIRST;
1417 /* internal databases don't mmap or lock, and start off cleared */
1418 if (tdb.flags & TDB_INTERNAL) {
1419 tdb.flags |= (TDB_NOLOCK | TDB_NOMMAP);
1420 tdb.flags &= ~TDB_CLEAR_IF_FIRST;
1421 tdb_new_database(&tdb, hash_size);
1425 if ((tdb.fd = open(name, open_flags, mode)) == -1)
1426 goto fail; /* errno set by open(2) */
1428 /* ensure there is only one process initialising at once */
1429 if (tdb_brlock(&tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0) == -1)
1430 goto fail; /* errno set by tdb_brlock */
1432 /* we need to zero database if we are the only one with it open */
1433 if ((locked = (tdb_brlock(&tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0) == 0))
1434 && (tdb_flags & TDB_CLEAR_IF_FIRST)) {
1435 open_flags |= O_CREAT;
1436 if (ftruncate(tdb.fd, 0) == -1)
1437 goto fail; /* errno set by ftruncate */
1440 if (read(tdb.fd, &tdb.header, sizeof(tdb.header)) != sizeof(tdb.header)
1441 || strcmp(tdb.header.magic_food, TDB_MAGIC_FOOD) != 0
1442 || (tdb.header.version != TDB_VERSION
1443 && !(rev = (tdb.header.version==TDB_BYTEREV(TDB_VERSION))))) {
1444 /* its not a valid database - possibly initialise it */
1445 if (!(open_flags & O_CREAT) || tdb_new_database(&tdb, hash_size) == -1) {
1446 errno = EIO; /* ie bad format or something */
1449 rev = (tdb.flags & TDB_CONVERT);
1452 tdb.flags &= ~TDB_CONVERT;
1454 tdb.flags |= TDB_CONVERT;
1455 convert(&tdb.header, sizeof(tdb.header));
1457 if (fstat(tdb.fd, &st) == -1)
1460 /* Is it already in the open list? If so, fail. */
1461 for (i = tdbs; i; i = i->next) {
1462 if (i->device == st.st_dev && i->inode == st.st_ino) {
1469 /* map the database and fill in the return structure */
1470 tdb.name = (char *)strdup(name);
1475 tdb.map_size = st.st_size;
1476 tdb.device = st.st_dev;
1477 tdb.inode = st.st_ino;
1478 tdb.locked = calloc(tdb.header.hash_size+1, sizeof(tdb.locked[0]));
1486 tdb_clear_spinlocks(&tdb);
1487 if (tdb_brlock(&tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0) == -1)
1490 /* leave this lock in place to indicate it's in use */
1491 if (tdb_brlock(&tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0) == -1)
1495 if (!(ret = malloc(sizeof(tdb)))) {
1500 if (tdb_brlock(&tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0) == -1)
1507 { int save_errno = errno;
1510 if (tdb.flags & TDB_INTERNAL)
1526 /* close a database */
1527 int tdb_close(TDB_CONTEXT *tdb)
1533 if (tdb->flags & TDB_INTERNAL)
1541 ret = close(tdb->fd);
1544 if (tdb->lockedkeys)
1545 free(tdb->lockedkeys);
1547 /* Remove from contexts list */
1548 for (i = &tdbs; *i; i = &(*i)->next) {
1555 memset(tdb, 0, sizeof(*tdb));
1561 /* lock/unlock entire database */
1562 int tdb_lockall(TDB_CONTEXT *tdb)
1566 /* There are no locks on read-only dbs */
1568 return TDB_ERRCODE(TDB_ERR_LOCK, -1);
1569 if (tdb->lockedkeys)
1570 return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
1571 for (i = 0; i < tdb->header.hash_size; i++)
1572 if (tdb_lock(tdb, i, F_WRLCK))
1575 /* If error, release locks we have... */
1576 if (i < tdb->header.hash_size) {
1579 for ( j = 0; j < i; j++)
1580 tdb_unlock(tdb, j, F_WRLCK);
1581 return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
1586 void tdb_unlockall(TDB_CONTEXT *tdb)
1589 for (i=0; i < tdb->header.hash_size; i++)
1590 tdb_unlock(tdb, i, F_WRLCK);
1593 int tdb_lockkeys(TDB_CONTEXT *tdb, u32 number, TDB_DATA keys[])
1597 /* Can't lock more keys if already locked */
1598 if (tdb->lockedkeys)
1599 return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
1600 if (!(tdb->lockedkeys = malloc(sizeof(u32) * (number+1))))
1601 return TDB_ERRCODE(TDB_ERR_OOM, -1);
1602 /* First number in array is # keys */
1603 tdb->lockedkeys[0] = number;
1605 /* Insertion sort by bucket */
1606 for (i = 0; i < number; i++) {
1607 hash = tdb_hash(&keys[i]);
1608 for (j = 0; j < i && BUCKET(tdb->lockedkeys[j+1]) < BUCKET(hash); j++);
1609 memmove(&tdb->lockedkeys[j+2], &tdb->lockedkeys[j+1], sizeof(u32) * (i-j));
1610 tdb->lockedkeys[j+1] = hash;
1612 /* Finally, lock in order */
1613 for (i = 0; i < number; i++)
1614 if (tdb_lock(tdb, i, F_WRLCK))
1617 /* If error, release locks we have... */
1619 for ( j = 0; j < i; j++)
1620 tdb_unlock(tdb, j, F_WRLCK);
1621 free(tdb->lockedkeys);
1622 tdb->lockedkeys = NULL;
1623 return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
1628 /* Unlock the keys previously locked by tdb_lockkeys() */
1629 void tdb_unlockkeys(TDB_CONTEXT *tdb)
1632 for (i = 0; i < tdb->lockedkeys[0]; i++)
1633 tdb_unlock(tdb, tdb->lockedkeys[i+1], F_WRLCK);
1634 free(tdb->lockedkeys);
1635 tdb->lockedkeys = NULL;
1638 /* lock/unlock one hash chain. This is meant to be used to reduce
1639 contention - it cannot guarantee how many records will be locked */
1640 int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key)
1642 return tdb_lock(tdb, BUCKET(tdb_hash(&key)), F_WRLCK);
1644 void tdb_chainunlock(TDB_CONTEXT *tdb, TDB_DATA key)
1646 tdb_unlock(tdb, BUCKET(tdb_hash(&key)), F_WRLCK);
1650 /* register a loging function */
1651 void tdb_logging_function(TDB_CONTEXT *tdb, void (*fn)(TDB_CONTEXT *, int , const char *, ...))
1657 /* reopen a tdb - this is used after a fork to ensure that we have an independent
1658 seek pointer from our parent and to re-establish locks */
1659 int tdb_reopen(TDB_CONTEXT *tdb)
1665 tdb->fd = open(tdb->name, tdb->open_flags & ~(O_CREAT|O_TRUNC), 0);
1666 if (tdb->fd == -1) {
1667 TDB_LOG((tdb, 0, "tdb_reopen: open failed (%s)\n", strerror(errno)));
1670 fstat(tdb->fd, &st);
1671 if (st.st_ino != tdb->inode || st.st_dev != tdb->device) {
1672 TDB_LOG((tdb, 0, "tdb_reopen: file dev/inode has changed!\n"));
1676 if (tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0) == -1) {
1677 TDB_LOG((tdb, 0, "tdb_reopen: failed to obtain active lock\n"));
1688 /* reopen all tdb's */
1689 int tdb_reopen_all(void)
1693 for (tdb=tdbs; tdb; tdb = tdb->next) {
1694 if (tdb_reopen(tdb) != 0) return -1;