2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 _PUBLIC_ TDB_DATA tdb_null;
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb->transaction != NULL) {
63 tdb_increment_seqnum_nonblock(tdb);
67 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
68 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
72 tdb_increment_seqnum_nonblock(tdb);
74 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
77 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
79 return memcmp(data.dptr, key.dptr, data.dsize);
82 /* Returns 0 on fail. On success, return offset of record, and fills
84 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
89 /* read in the hash top */
90 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
93 /* keep looking until we find the right record */
95 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
98 if (!TDB_DEAD(r) && hash==r->full_hash
99 && key.dsize==r->key_len
100 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
101 r->key_len, tdb_key_compare,
105 /* detect tight infinite loop */
106 if (rec_ptr == r->next) {
107 tdb->ecode = TDB_ERR_CORRUPT;
108 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_find: loop detected.\n"));
113 tdb->ecode = TDB_ERR_NOEXIST;
117 /* As tdb_find, but if you succeed, keep the lock */
118 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
119 struct tdb_record *rec)
123 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
125 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
126 tdb_unlock(tdb, BUCKET(hash), locktype);
130 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
132 struct tdb_update_hash_state {
133 const TDB_DATA *dbufs;
138 static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data)
140 struct tdb_update_hash_state *state = private_data;
141 unsigned char *dptr = data.dptr;
144 if (state->dbufs_len != data.dsize) {
148 for (i=0; i<state->num_dbufs; i++) {
149 TDB_DATA dbuf = state->dbufs[i];
151 ret = memcmp(dptr, dbuf.dptr, dbuf.dsize);
161 /* update an entry in place - this only works if the new data size
162 is <= the old data size and the key exists.
163 on failure return -1.
165 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key,
167 const TDB_DATA *dbufs, int num_dbufs,
170 struct tdb_record rec;
171 tdb_off_t rec_ptr, ofs;
175 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
178 /* it could be an exact duplicate of what is there - this is
179 * surprisingly common (eg. with a ldb re-index). */
180 if (rec.data_len == dbufs_len) {
181 struct tdb_update_hash_state state = {
182 .dbufs = dbufs, .num_dbufs = num_dbufs,
183 .dbufs_len = dbufs_len
187 ret = tdb_parse_record(tdb, key, tdb_update_hash_cmp, &state);
193 /* must be long enough key, data and tailer */
194 if (rec.rec_len < key.dsize + dbufs_len + sizeof(tdb_off_t)) {
195 tdb->ecode = TDB_SUCCESS; /* Not really an error */
199 ofs = rec_ptr + sizeof(rec) + rec.key_len;
201 for (i=0; i<num_dbufs; i++) {
202 TDB_DATA dbuf = dbufs[i];
205 ret = tdb->methods->tdb_write(tdb, ofs, dbuf.dptr, dbuf.dsize);
212 if (dbufs_len != rec.data_len) {
214 rec.data_len = dbufs_len;
215 return tdb_rec_write(tdb, rec_ptr, &rec);
221 /* find an entry in the database given a key */
222 /* If an entry doesn't exist tdb_err will be set to
223 * TDB_ERR_NOEXIST. If a key has no data attached
224 * then the TDB_DATA will have zero length but
227 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
230 struct tdb_record rec;
234 /* find which hash bucket it is in */
235 hash = tdb->hash_fn(&key);
236 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
239 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
241 ret.dsize = rec.data_len;
242 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
246 _PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
248 TDB_DATA ret = _tdb_fetch(tdb, key);
250 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
255 * Find an entry in the database and hand the record's data to a parsing
256 * function. The parsing function is executed under the chain read lock, so it
257 * should be fast and should not block on other syscalls.
259 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
261 * For mmapped tdb's that do not have a transaction open it points the parsing
262 * function directly at the mmap area, it avoids the malloc/memcpy in this
263 * case. If a transaction is open or no mmap is available, it has to do
264 * malloc/read/parse/free.
266 * This is interesting for all readers of potentially large data structures in
267 * the tdb records, ldb indexes being one example.
269 * Return -1 if the record was not found.
272 _PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
273 int (*parser)(TDB_DATA key, TDB_DATA data,
278 struct tdb_record rec;
282 /* find which hash bucket it is in */
283 hash = tdb->hash_fn(&key);
285 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
286 /* record not found */
287 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
288 tdb->ecode = TDB_ERR_NOEXIST;
291 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
293 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
294 rec.data_len, parser, private_data);
296 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
301 /* check if an entry in the database exists
303 note that 1 is returned if the key is found and 0 is returned if not found
304 this doesn't match the conventions in the rest of this module, but is
307 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
309 struct tdb_record rec;
311 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
313 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
317 _PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
319 uint32_t hash = tdb->hash_fn(&key);
322 ret = tdb_exists_hash(tdb, key, hash);
323 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
327 /* actually delete an entry in the database given the offset */
328 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec)
330 tdb_off_t last_ptr, i;
331 struct tdb_record lastrec;
333 if (tdb->read_only || tdb->traverse_read) return -1;
335 if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
336 tdb_write_lock_record(tdb, rec_ptr) == -1) {
337 /* Someone traversing here: mark it as dead */
338 rec->magic = TDB_DEAD_MAGIC;
339 return tdb_rec_write(tdb, rec_ptr, rec);
341 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
344 /* find previous record in hash chain */
345 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
347 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
348 if (tdb_rec_read(tdb, i, &lastrec) == -1)
351 /* unlink it: next ptr is at start of record. */
353 last_ptr = TDB_HASH_TOP(rec->full_hash);
354 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
357 /* recover the space */
358 if (tdb_free(tdb, rec_ptr, rec) == -1)
363 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
367 struct tdb_record rec;
369 /* read in the hash top */
370 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
374 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
377 if (rec.magic == TDB_DEAD_MAGIC) {
386 * Purge all DEAD records from a hash chain
388 int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
391 struct tdb_record rec;
394 if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == -1) {
396 * Don't block the freelist if not strictly necessary
401 /* read in the hash top */
402 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
408 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
414 if (rec.magic == TDB_DEAD_MAGIC
415 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
422 tdb_unlock(tdb, -1, F_WRLCK);
426 /* delete an entry in the database given a key */
427 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
430 struct tdb_record rec;
433 rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec);
438 if (tdb->max_dead_records != 0) {
440 uint32_t magic = TDB_DEAD_MAGIC;
443 * Allow for some dead records per hash chain, mainly for
444 * tdb's with a very high create/delete rate like locking.tdb.
447 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
449 * Don't let the per-chain freelist grow too large,
450 * delete all existing dead records
452 tdb_purge_dead(tdb, hash);
456 * Just mark the record as dead.
459 tdb, rec_ptr + offsetof(struct tdb_record, magic),
463 ret = tdb_do_delete(tdb, rec_ptr, &rec);
467 tdb_increment_seqnum(tdb);
470 if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0)
471 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
475 _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
477 uint32_t hash = tdb->hash_fn(&key);
480 ret = tdb_delete_hash(tdb, key, hash);
481 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
486 * See if we have a dead record around with enough space
488 tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
489 struct tdb_record *r, tdb_len_t length,
490 tdb_off_t *p_last_ptr)
492 tdb_off_t rec_ptr, last_ptr;
493 tdb_off_t best_rec_ptr = 0;
494 tdb_off_t best_last_ptr = 0;
495 struct tdb_record best = { .rec_len = UINT32_MAX };
497 length += sizeof(tdb_off_t); /* tailer */
499 last_ptr = TDB_HASH_TOP(hash);
501 /* read in the hash top */
502 if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1)
505 /* keep looking until we find the right record */
507 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
510 if (TDB_DEAD(r) && (r->rec_len >= length) &&
511 (r->rec_len < best.rec_len)) {
512 best_rec_ptr = rec_ptr;
513 best_last_ptr = last_ptr;
520 if (best.rec_len == UINT32_MAX) {
525 *p_last_ptr = best_last_ptr;
529 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
530 TDB_DATA dbuf, int flag, uint32_t hash)
532 struct tdb_record rec;
537 rec_len = key.dsize + dbuf.dsize;
538 if ((rec_len < key.dsize) || (rec_len < dbuf.dsize)) {
539 tdb->ecode = TDB_ERR_OOM;
543 /* check for it existing, on insert. */
544 if (flag == TDB_INSERT) {
545 if (tdb_exists_hash(tdb, key, hash)) {
546 tdb->ecode = TDB_ERR_EXISTS;
550 /* first try in-place update, on modify or replace. */
551 if (tdb_update_hash(tdb, key, hash, &dbuf, 1,
555 if (tdb->ecode == TDB_ERR_NOEXIST &&
556 flag == TDB_MODIFY) {
557 /* if the record doesn't exist and we are in TDB_MODIFY mode then
558 we should fail the store */
562 /* reset the error code potentially set by the tdb_update_hash() */
563 tdb->ecode = TDB_SUCCESS;
565 /* delete any existing record - if it doesn't exist we don't
566 care. Doing this first reduces fragmentation, and avoids
567 coalescing with `allocated' block before it's updated. */
568 if (flag != TDB_INSERT)
569 tdb_delete_hash(tdb, key, hash);
571 /* we have to allocate some space */
572 rec_ptr = tdb_allocate(tdb, hash, rec_len, &rec);
578 /* Read hash top into next ptr */
579 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
582 rec.key_len = key.dsize;
583 rec.data_len = dbuf.dsize;
584 rec.full_hash = hash;
585 rec.magic = TDB_MAGIC;
587 /* write out and point the top of the hash chain at it */
588 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
589 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec),
590 key.dptr, key.dsize) == -1
591 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec)+key.dsize,
592 dbuf.dptr, dbuf.dsize) == -1
593 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
594 /* Need to tdb_unallocate() here */
602 tdb_increment_seqnum(tdb);
607 /* store an element in the database, replacing any existing element
610 return 0 on success, -1 on failure
612 _PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
617 if (tdb->read_only || tdb->traverse_read) {
618 tdb->ecode = TDB_ERR_RDONLY;
619 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
623 /* find which hash bucket it is in */
624 hash = tdb->hash_fn(&key);
625 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
628 ret = _tdb_store(tdb, key, dbuf, flag, hash);
629 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
630 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
634 /* Append to an entry. Create if not exist. */
635 _PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
641 /* find which hash bucket it is in */
642 hash = tdb->hash_fn(&key);
643 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
646 dbuf = _tdb_fetch(tdb, key);
648 if (dbuf.dptr == NULL) {
649 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
651 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
652 unsigned char *new_dptr;
654 /* realloc '0' is special: don't do that. */
657 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
658 if (new_dptr == NULL) {
661 dbuf.dptr = new_dptr;
664 if (dbuf.dptr == NULL) {
665 tdb->ecode = TDB_ERR_OOM;
669 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
670 dbuf.dsize += new_dbuf.dsize;
672 ret = _tdb_store(tdb, key, dbuf, 0, hash);
673 tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
676 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
677 SAFE_FREE(dbuf.dptr);
683 return the name of the current tdb file
684 useful for external logging functions
686 _PUBLIC_ const char *tdb_name(struct tdb_context *tdb)
692 return the underlying file descriptor being used by tdb, or -1
693 useful for external routines that want to check the device/inode
696 _PUBLIC_ int tdb_fd(struct tdb_context *tdb)
702 return the current logging function
703 useful for external tdb routines that wish to log tdb errors
705 _PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb)
707 return tdb->log.log_fn;
712 get the tdb sequence number. Only makes sense if the writers opened
713 with TDB_SEQNUM set. Note that this sequence number will wrap quite
714 quickly, so it should only be used for a 'has something changed'
715 test, not for code that relies on the count of the number of changes
716 made. If you want a counter then use a tdb record.
718 The aim of this sequence number is to allow for a very lightweight
719 test of a possible tdb change.
721 _PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb)
725 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
729 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb)
731 return tdb->hash_size;
734 _PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb)
736 return tdb->map_size;
739 _PUBLIC_ int tdb_get_flags(struct tdb_context *tdb)
744 _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
746 if ((flags & TDB_ALLOW_NESTING) &&
747 (flags & TDB_DISALLOW_NESTING)) {
748 tdb->ecode = TDB_ERR_NESTING;
749 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
750 "allow_nesting and disallow_nesting are not allowed together!"));
754 if (flags & TDB_ALLOW_NESTING) {
755 tdb->flags &= ~TDB_DISALLOW_NESTING;
757 if (flags & TDB_DISALLOW_NESTING) {
758 tdb->flags &= ~TDB_ALLOW_NESTING;
764 _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
766 if ((flags & TDB_ALLOW_NESTING) &&
767 (flags & TDB_DISALLOW_NESTING)) {
768 tdb->ecode = TDB_ERR_NESTING;
769 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
770 "allow_nesting and disallow_nesting are not allowed together!"));
774 if ((flags & TDB_NOLOCK) &&
775 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) &&
776 (tdb->mutexes == NULL)) {
777 tdb->ecode = TDB_ERR_LOCK;
778 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
779 "Can not remove NOLOCK flag on mutexed databases"));
783 if (flags & TDB_ALLOW_NESTING) {
784 tdb->flags |= TDB_DISALLOW_NESTING;
786 if (flags & TDB_DISALLOW_NESTING) {
787 tdb->flags |= TDB_ALLOW_NESTING;
790 tdb->flags &= ~flags;
795 enable sequence number handling on an open tdb
797 _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb)
799 tdb->flags |= TDB_SEQNUM;
804 add a region of the file to the freelist. Length is the size of the region in bytes,
805 which includes the free list header that needs to be added
807 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
809 struct tdb_record rec;
810 if (length <= sizeof(rec)) {
811 /* the region is not worth adding */
814 if (length + offset > tdb->map_size) {
815 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
818 memset(&rec,'\0',sizeof(rec));
819 rec.rec_len = length - sizeof(rec);
820 if (tdb_free(tdb, offset, &rec) == -1) {
821 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
828 wipe the entire database, deleting all records. This can be done
829 very fast by using a allrecord lock. The entire data portion of the
830 file becomes a single entry in the freelist.
832 This code carefully steps around the recovery area, leaving it alone
834 _PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb)
837 tdb_off_t offset = 0;
839 tdb_off_t recovery_head;
840 tdb_len_t recovery_size = 0;
842 if (tdb_lockall(tdb) != 0) {
846 tdb_trace(tdb, "tdb_wipe_all");
848 /* see if the tdb has a recovery area, and remember its size
849 if so. We don't want to lose this as otherwise each
850 tdb_wipe_all() in a transaction will increase the size of
851 the tdb by the size of the recovery area */
852 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
853 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
857 if (recovery_head != 0) {
858 struct tdb_record rec;
859 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
860 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
863 recovery_size = rec.rec_len + sizeof(rec);
866 /* wipe the hashes */
867 for (i=0;i<tdb->hash_size;i++) {
868 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
869 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
874 /* wipe the freelist */
875 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
876 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
880 /* add all the rest of the file to the freelist, possibly leaving a gap
881 for the recovery area */
882 if (recovery_size == 0) {
883 /* the simple case - the whole file can be used as a freelist */
884 data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size));
885 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
889 /* we need to add two freelist entries - one on either
890 side of the recovery area
892 Note that we cannot shift the recovery area during
893 this operation. Only the transaction.c code may
894 move the recovery area or we risk subtle data
897 data_len = (recovery_head - TDB_DATA_START(tdb->hash_size));
898 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
901 /* and the 2nd free list entry after the recovery area - if any */
902 data_len = tdb->map_size - (recovery_head+recovery_size);
903 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
908 tdb_increment_seqnum_nonblock(tdb);
910 if (tdb_unlockall(tdb) != 0) {
911 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
922 struct traverse_state {
924 struct tdb_context *dest_db;
928 traverse function for repacking
930 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
932 struct traverse_state *state = (struct traverse_state *)private_data;
933 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
943 _PUBLIC_ int tdb_repack(struct tdb_context *tdb)
945 struct tdb_context *tmp_db;
946 struct traverse_state state;
948 tdb_trace(tdb, "tdb_repack");
950 if (tdb_transaction_start(tdb) != 0) {
951 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
955 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
956 if (tmp_db == NULL) {
957 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
958 tdb_transaction_cancel(tdb);
963 state.dest_db = tmp_db;
965 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
966 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
967 tdb_transaction_cancel(tdb);
973 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
974 tdb_transaction_cancel(tdb);
979 if (tdb_wipe_all(tdb) != 0) {
980 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
981 tdb_transaction_cancel(tdb);
989 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
990 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
991 tdb_transaction_cancel(tdb);
997 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
998 tdb_transaction_cancel(tdb);
1005 if (tdb_transaction_commit(tdb) != 0) {
1006 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
1013 /* Even on files, we can get partial writes due to signals. */
1014 bool tdb_write_all(int fd, const void *buf, size_t count)
1018 ret = write(fd, buf, count);
1021 buf = (const char *)buf + ret;
1027 bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret)
1029 tdb_off_t ret = a + b;
1031 if ((ret < a) || (ret < b)) {
1039 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
1041 if (!tdb_write_all(tdb->tracefd, str, strlen(str))) {
1042 close(tdb->tracefd);
1047 static void tdb_trace_start(struct tdb_context *tdb)
1050 char msg[sizeof(tdb_off_t) * 4 + 1];
1052 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1053 snprintf(msg, sizeof(msg), "%u ", seqnum);
1054 tdb_trace_write(tdb, msg);
1057 static void tdb_trace_end(struct tdb_context *tdb)
1059 tdb_trace_write(tdb, "\n");
1062 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1064 char msg[sizeof(ret) * 4 + 4];
1065 snprintf(msg, sizeof(msg), " = %i\n", ret);
1066 tdb_trace_write(tdb, msg);
1069 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1071 char msg[20 + rec.dsize*2], *p;
1074 /* We differentiate zero-length records from non-existent ones. */
1075 if (rec.dptr == NULL) {
1076 tdb_trace_write(tdb, " NULL");
1080 /* snprintf here is purely cargo-cult programming. */
1082 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1083 for (i = 0; i < rec.dsize; i++)
1084 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1086 tdb_trace_write(tdb, msg);
1089 void tdb_trace(struct tdb_context *tdb, const char *op)
1091 tdb_trace_start(tdb);
1092 tdb_trace_write(tdb, op);
1096 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1098 char msg[sizeof(tdb_off_t) * 4 + 1];
1100 snprintf(msg, sizeof(msg), "%u ", seqnum);
1101 tdb_trace_write(tdb, msg);
1102 tdb_trace_write(tdb, op);
1106 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1107 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1111 snprintf(msg, sizeof(msg),
1112 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1113 tdb_trace_start(tdb);
1114 tdb_trace_write(tdb, msg);
1118 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1120 tdb_trace_start(tdb);
1121 tdb_trace_write(tdb, op);
1122 tdb_trace_end_ret(tdb, ret);
1125 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1127 tdb_trace_start(tdb);
1128 tdb_trace_write(tdb, op);
1129 tdb_trace_write(tdb, " =");
1130 tdb_trace_record(tdb, ret);
1134 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1137 tdb_trace_start(tdb);
1138 tdb_trace_write(tdb, op);
1139 tdb_trace_record(tdb, rec);
1143 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1144 TDB_DATA rec, int ret)
1146 tdb_trace_start(tdb);
1147 tdb_trace_write(tdb, op);
1148 tdb_trace_record(tdb, rec);
1149 tdb_trace_end_ret(tdb, ret);
1152 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1153 TDB_DATA rec, TDB_DATA ret)
1155 tdb_trace_start(tdb);
1156 tdb_trace_write(tdb, op);
1157 tdb_trace_record(tdb, rec);
1158 tdb_trace_write(tdb, " =");
1159 tdb_trace_record(tdb, ret);
1163 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1164 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1167 char msg[1 + sizeof(ret) * 4];
1169 snprintf(msg, sizeof(msg), " %#x", flag);
1170 tdb_trace_start(tdb);
1171 tdb_trace_write(tdb, op);
1172 tdb_trace_record(tdb, rec1);
1173 tdb_trace_record(tdb, rec2);
1174 tdb_trace_write(tdb, msg);
1175 tdb_trace_end_ret(tdb, ret);
1178 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1179 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1181 tdb_trace_start(tdb);
1182 tdb_trace_write(tdb, op);
1183 tdb_trace_record(tdb, rec1);
1184 tdb_trace_record(tdb, rec2);
1185 tdb_trace_write(tdb, " =");
1186 tdb_trace_record(tdb, ret);