2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
63 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
67 tdb_increment_seqnum_nonblock(tdb);
69 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
72 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
74 return memcmp(data.dptr, key.dptr, data.dsize);
77 /* Returns 0 on fail. On success, return offset of record, and fills
79 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
84 /* read in the hash top */
85 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
88 /* keep looking until we find the right record */
90 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
93 if (!TDB_DEAD(r) && hash==r->full_hash
94 && key.dsize==r->key_len
95 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
96 r->key_len, tdb_key_compare,
100 /* detect tight infinite loop */
101 if (rec_ptr == r->next) {
102 tdb->ecode = TDB_ERR_CORRUPT;
103 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_find: loop detected.\n"));
108 tdb->ecode = TDB_ERR_NOEXIST;
112 /* As tdb_find, but if you succeed, keep the lock */
113 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
114 struct tdb_record *rec)
118 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
120 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
121 tdb_unlock(tdb, BUCKET(hash), locktype);
125 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
127 /* update an entry in place - this only works if the new data size
128 is <= the old data size and the key exists.
129 on failure return -1.
131 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
133 struct tdb_record rec;
137 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
140 /* it could be an exact duplicate of what is there - this is
141 * surprisingly common (eg. with a ldb re-index). */
142 if (rec.key_len == key.dsize &&
143 rec.data_len == dbuf.dsize &&
144 rec.full_hash == hash) {
145 TDB_DATA data = _tdb_fetch(tdb, key);
146 if (data.dsize == dbuf.dsize &&
147 memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
159 /* must be long enough key, data and tailer */
160 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
161 tdb->ecode = TDB_SUCCESS; /* Not really an error */
165 if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
166 dbuf.dptr, dbuf.dsize) == -1)
169 if (dbuf.dsize != rec.data_len) {
171 rec.data_len = dbuf.dsize;
172 return tdb_rec_write(tdb, rec_ptr, &rec);
178 /* find an entry in the database given a key */
179 /* If an entry doesn't exist tdb_err will be set to
180 * TDB_ERR_NOEXIST. If a key has no data attached
181 * then the TDB_DATA will have zero length but
184 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
187 struct tdb_record rec;
191 /* find which hash bucket it is in */
192 hash = tdb->hash_fn(&key);
193 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
196 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
198 ret.dsize = rec.data_len;
199 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
203 TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
205 TDB_DATA ret = _tdb_fetch(tdb, key);
207 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
212 * Find an entry in the database and hand the record's data to a parsing
213 * function. The parsing function is executed under the chain read lock, so it
214 * should be fast and should not block on other syscalls.
216 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
218 * For mmapped tdb's that do not have a transaction open it points the parsing
219 * function directly at the mmap area, it avoids the malloc/memcpy in this
220 * case. If a transaction is open or no mmap is available, it has to do
221 * malloc/read/parse/free.
223 * This is interesting for all readers of potentially large data structures in
224 * the tdb records, ldb indexes being one example.
226 * Return -1 if the record was not found.
229 int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
230 int (*parser)(TDB_DATA key, TDB_DATA data,
235 struct tdb_record rec;
239 /* find which hash bucket it is in */
240 hash = tdb->hash_fn(&key);
242 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
243 /* record not found */
244 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
245 tdb->ecode = TDB_ERR_NOEXIST;
248 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
250 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
251 rec.data_len, parser, private_data);
253 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
258 /* check if an entry in the database exists
260 note that 1 is returned if the key is found and 0 is returned if not found
261 this doesn't match the conventions in the rest of this module, but is
264 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
266 struct tdb_record rec;
268 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
270 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
274 int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
276 uint32_t hash = tdb->hash_fn(&key);
279 ret = tdb_exists_hash(tdb, key, hash);
280 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
284 /* actually delete an entry in the database given the offset */
285 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec)
287 tdb_off_t last_ptr, i;
288 struct tdb_record lastrec;
290 if (tdb->read_only || tdb->traverse_read) return -1;
292 if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
293 tdb_write_lock_record(tdb, rec_ptr) == -1) {
294 /* Someone traversing here: mark it as dead */
295 rec->magic = TDB_DEAD_MAGIC;
296 return tdb_rec_write(tdb, rec_ptr, rec);
298 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
301 /* find previous record in hash chain */
302 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
304 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
305 if (tdb_rec_read(tdb, i, &lastrec) == -1)
308 /* unlink it: next ptr is at start of record. */
310 last_ptr = TDB_HASH_TOP(rec->full_hash);
311 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
314 /* recover the space */
315 if (tdb_free(tdb, rec_ptr, rec) == -1)
320 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
324 struct tdb_record rec;
326 /* read in the hash top */
327 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
331 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
334 if (rec.magic == TDB_DEAD_MAGIC) {
343 * Purge all DEAD records from a hash chain
345 static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
348 struct tdb_record rec;
351 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
355 /* read in the hash top */
356 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
362 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
368 if (rec.magic == TDB_DEAD_MAGIC
369 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
376 tdb_unlock(tdb, -1, F_WRLCK);
380 /* delete an entry in the database given a key */
381 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
384 struct tdb_record rec;
387 if (tdb->max_dead_records != 0) {
390 * Allow for some dead records per hash chain, mainly for
391 * tdb's with a very high create/delete rate like locking.tdb.
394 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
397 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
399 * Don't let the per-chain freelist grow too large,
400 * delete all existing dead records
402 tdb_purge_dead(tdb, hash);
405 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
406 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
411 * Just mark the record as dead.
413 rec.magic = TDB_DEAD_MAGIC;
414 ret = tdb_rec_write(tdb, rec_ptr, &rec);
417 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
421 ret = tdb_do_delete(tdb, rec_ptr, &rec);
425 tdb_increment_seqnum(tdb);
428 if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
429 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
433 int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
435 uint32_t hash = tdb->hash_fn(&key);
438 ret = tdb_delete_hash(tdb, key, hash);
439 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
444 * See if we have a dead record around with enough space
446 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
447 struct tdb_record *r, tdb_len_t length)
451 /* read in the hash top */
452 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
455 /* keep looking until we find the right record */
457 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
460 if (TDB_DEAD(r) && r->rec_len >= length) {
462 * First fit for simple coding, TODO: change to best
472 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
473 TDB_DATA dbuf, int flag, uint32_t hash)
475 struct tdb_record rec;
480 /* check for it existing, on insert. */
481 if (flag == TDB_INSERT) {
482 if (tdb_exists_hash(tdb, key, hash)) {
483 tdb->ecode = TDB_ERR_EXISTS;
487 /* first try in-place update, on modify or replace. */
488 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
491 if (tdb->ecode == TDB_ERR_NOEXIST &&
492 flag == TDB_MODIFY) {
493 /* if the record doesn't exist and we are in TDB_MODIFY mode then
494 we should fail the store */
498 /* reset the error code potentially set by the tdb_update() */
499 tdb->ecode = TDB_SUCCESS;
501 /* delete any existing record - if it doesn't exist we don't
502 care. Doing this first reduces fragmentation, and avoids
503 coalescing with `allocated' block before it's updated. */
504 if (flag != TDB_INSERT)
505 tdb_delete_hash(tdb, key, hash);
507 /* Copy key+value *before* allocating free space in case malloc
508 fails and we are left with a dead spot in the tdb. */
510 if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
511 tdb->ecode = TDB_ERR_OOM;
515 memcpy(p, key.dptr, key.dsize);
517 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
519 if (tdb->max_dead_records != 0) {
521 * Allow for some dead records per hash chain, look if we can
522 * find one that can hold the new record. We need enough space
523 * for key, data and tailer. If we find one, we don't have to
524 * consult the central freelist.
526 rec_ptr = tdb_find_dead(
528 key.dsize + dbuf.dsize + sizeof(tdb_off_t));
531 rec.key_len = key.dsize;
532 rec.data_len = dbuf.dsize;
533 rec.full_hash = hash;
534 rec.magic = TDB_MAGIC;
535 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
536 || tdb->methods->tdb_write(
537 tdb, rec_ptr + sizeof(rec),
538 p, key.dsize + dbuf.dsize) == -1) {
546 * We have to allocate some space from the freelist, so this means we
547 * have to lock it. Use the chance to purge all the DEAD records from
548 * the hash chain under the freelist lock.
551 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
555 if ((tdb->max_dead_records != 0)
556 && (tdb_purge_dead(tdb, hash) == -1)) {
557 tdb_unlock(tdb, -1, F_WRLCK);
561 /* we have to allocate some space */
562 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
564 tdb_unlock(tdb, -1, F_WRLCK);
570 /* Read hash top into next ptr */
571 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
574 rec.key_len = key.dsize;
575 rec.data_len = dbuf.dsize;
576 rec.full_hash = hash;
577 rec.magic = TDB_MAGIC;
579 /* write out and point the top of the hash chain at it */
580 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
581 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
582 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
583 /* Need to tdb_unallocate() here */
591 tdb_increment_seqnum(tdb);
598 /* store an element in the database, replacing any existing element
601 return 0 on success, -1 on failure
603 int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
608 if (tdb->read_only || tdb->traverse_read) {
609 tdb->ecode = TDB_ERR_RDONLY;
610 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
614 /* find which hash bucket it is in */
615 hash = tdb->hash_fn(&key);
616 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
619 ret = _tdb_store(tdb, key, dbuf, flag, hash);
620 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
621 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
625 /* Append to an entry. Create if not exist. */
626 int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
632 /* find which hash bucket it is in */
633 hash = tdb->hash_fn(&key);
634 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
637 dbuf = _tdb_fetch(tdb, key);
639 if (dbuf.dptr == NULL) {
640 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
642 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
643 unsigned char *new_dptr;
645 /* realloc '0' is special: don't do that. */
648 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
649 if (new_dptr == NULL) {
652 dbuf.dptr = new_dptr;
655 if (dbuf.dptr == NULL) {
656 tdb->ecode = TDB_ERR_OOM;
660 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
661 dbuf.dsize += new_dbuf.dsize;
663 ret = _tdb_store(tdb, key, dbuf, 0, hash);
664 tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
667 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
668 SAFE_FREE(dbuf.dptr);
674 return the name of the current tdb file
675 useful for external logging functions
677 const char *tdb_name(struct tdb_context *tdb)
683 return the underlying file descriptor being used by tdb, or -1
684 useful for external routines that want to check the device/inode
687 int tdb_fd(struct tdb_context *tdb)
693 return the current logging function
694 useful for external tdb routines that wish to log tdb errors
696 tdb_log_func tdb_log_fn(struct tdb_context *tdb)
698 return tdb->log.log_fn;
703 get the tdb sequence number. Only makes sense if the writers opened
704 with TDB_SEQNUM set. Note that this sequence number will wrap quite
705 quickly, so it should only be used for a 'has something changed'
706 test, not for code that relies on the count of the number of changes
707 made. If you want a counter then use a tdb record.
709 The aim of this sequence number is to allow for a very lightweight
710 test of a possible tdb change.
712 int tdb_get_seqnum(struct tdb_context *tdb)
716 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
720 int tdb_hash_size(struct tdb_context *tdb)
722 return tdb->header.hash_size;
725 size_t tdb_map_size(struct tdb_context *tdb)
727 return tdb->map_size;
730 int tdb_get_flags(struct tdb_context *tdb)
735 void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
737 if ((flags & TDB_ALLOW_NESTING) &&
738 (flags & TDB_DISALLOW_NESTING)) {
739 tdb->ecode = TDB_ERR_NESTING;
740 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
741 "allow_nesting and disallow_nesting are not allowed together!"));
745 if (flags & TDB_ALLOW_NESTING) {
746 tdb->flags &= ~TDB_DISALLOW_NESTING;
748 if (flags & TDB_DISALLOW_NESTING) {
749 tdb->flags &= ~TDB_ALLOW_NESTING;
755 void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
757 if ((flags & TDB_ALLOW_NESTING) &&
758 (flags & TDB_DISALLOW_NESTING)) {
759 tdb->ecode = TDB_ERR_NESTING;
760 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
761 "allow_nesting and disallow_nesting are not allowed together!"));
765 if (flags & TDB_ALLOW_NESTING) {
766 tdb->flags |= TDB_DISALLOW_NESTING;
768 if (flags & TDB_DISALLOW_NESTING) {
769 tdb->flags |= TDB_ALLOW_NESTING;
772 tdb->flags &= ~flags;
777 enable sequence number handling on an open tdb
779 void tdb_enable_seqnum(struct tdb_context *tdb)
781 tdb->flags |= TDB_SEQNUM;
786 add a region of the file to the freelist. Length is the size of the region in bytes,
787 which includes the free list header that needs to be added
789 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
791 struct tdb_record rec;
792 if (length <= sizeof(rec)) {
793 /* the region is not worth adding */
796 if (length + offset > tdb->map_size) {
797 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
800 memset(&rec,'\0',sizeof(rec));
801 rec.rec_len = length - sizeof(rec);
802 if (tdb_free(tdb, offset, &rec) == -1) {
803 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
810 wipe the entire database, deleting all records. This can be done
811 very fast by using a allrecord lock. The entire data portion of the
812 file becomes a single entry in the freelist.
814 This code carefully steps around the recovery area, leaving it alone
816 int tdb_wipe_all(struct tdb_context *tdb)
819 tdb_off_t offset = 0;
821 tdb_off_t recovery_head;
822 tdb_len_t recovery_size = 0;
824 if (tdb_lockall(tdb) != 0) {
828 tdb_trace(tdb, "tdb_wipe_all");
830 /* see if the tdb has a recovery area, and remember its size
831 if so. We don't want to lose this as otherwise each
832 tdb_wipe_all() in a transaction will increase the size of
833 the tdb by the size of the recovery area */
834 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
835 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
839 if (recovery_head != 0) {
840 struct tdb_record rec;
841 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
842 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
845 recovery_size = rec.rec_len + sizeof(rec);
848 /* wipe the hashes */
849 for (i=0;i<tdb->header.hash_size;i++) {
850 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
851 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
856 /* wipe the freelist */
857 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
858 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
862 /* add all the rest of the file to the freelist, possibly leaving a gap
863 for the recovery area */
864 if (recovery_size == 0) {
865 /* the simple case - the whole file can be used as a freelist */
866 data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size));
867 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
871 /* we need to add two freelist entries - one on either
872 side of the recovery area
874 Note that we cannot shift the recovery area during
875 this operation. Only the transaction.c code may
876 move the recovery area or we risk subtle data
879 data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size));
880 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
883 /* and the 2nd free list entry after the recovery area - if any */
884 data_len = tdb->map_size - (recovery_head+recovery_size);
885 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
890 if (tdb_unlockall(tdb) != 0) {
891 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
902 struct traverse_state {
904 struct tdb_context *dest_db;
908 traverse function for repacking
910 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
912 struct traverse_state *state = (struct traverse_state *)private_data;
913 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
923 int tdb_repack(struct tdb_context *tdb)
925 struct tdb_context *tmp_db;
926 struct traverse_state state;
928 tdb_trace(tdb, "tdb_repack");
930 if (tdb_transaction_start(tdb) != 0) {
931 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
935 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
936 if (tmp_db == NULL) {
937 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
938 tdb_transaction_cancel(tdb);
943 state.dest_db = tmp_db;
945 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
946 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
947 tdb_transaction_cancel(tdb);
953 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
954 tdb_transaction_cancel(tdb);
959 if (tdb_wipe_all(tdb) != 0) {
960 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
961 tdb_transaction_cancel(tdb);
969 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
970 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
971 tdb_transaction_cancel(tdb);
977 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
978 tdb_transaction_cancel(tdb);
985 if (tdb_transaction_commit(tdb) != 0) {
986 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
994 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
996 if (write(tdb->tracefd, str, strlen(str)) != strlen(str)) {
1002 static void tdb_trace_start(struct tdb_context *tdb)
1005 char msg[sizeof(tdb_off_t) * 4 + 1];
1007 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1008 snprintf(msg, sizeof(msg), "%u ", seqnum);
1009 tdb_trace_write(tdb, msg);
1012 static void tdb_trace_end(struct tdb_context *tdb)
1014 tdb_trace_write(tdb, "\n");
1017 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1019 char msg[sizeof(ret) * 4 + 4];
1020 snprintf(msg, sizeof(msg), " = %i\n", ret);
1021 tdb_trace_write(tdb, msg);
1024 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1026 char msg[20 + rec.dsize*2], *p;
1029 /* We differentiate zero-length records from non-existent ones. */
1030 if (rec.dptr == NULL) {
1031 tdb_trace_write(tdb, " NULL");
1035 /* snprintf here is purely cargo-cult programming. */
1037 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1038 for (i = 0; i < rec.dsize; i++)
1039 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1041 tdb_trace_write(tdb, msg);
1044 void tdb_trace(struct tdb_context *tdb, const char *op)
1046 tdb_trace_start(tdb);
1047 tdb_trace_write(tdb, op);
1051 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1053 char msg[sizeof(tdb_off_t) * 4 + 1];
1055 snprintf(msg, sizeof(msg), "%u ", seqnum);
1056 tdb_trace_write(tdb, msg);
1057 tdb_trace_write(tdb, op);
1061 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1062 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1066 snprintf(msg, sizeof(msg),
1067 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1068 tdb_trace_start(tdb);
1069 tdb_trace_write(tdb, msg);
1073 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1075 tdb_trace_start(tdb);
1076 tdb_trace_write(tdb, op);
1077 tdb_trace_end_ret(tdb, ret);
1080 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1082 tdb_trace_start(tdb);
1083 tdb_trace_write(tdb, op);
1084 tdb_trace_write(tdb, " =");
1085 tdb_trace_record(tdb, ret);
1089 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1092 tdb_trace_start(tdb);
1093 tdb_trace_write(tdb, op);
1094 tdb_trace_record(tdb, rec);
1098 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1099 TDB_DATA rec, int ret)
1101 tdb_trace_start(tdb);
1102 tdb_trace_write(tdb, op);
1103 tdb_trace_record(tdb, rec);
1104 tdb_trace_end_ret(tdb, ret);
1107 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1108 TDB_DATA rec, TDB_DATA ret)
1110 tdb_trace_start(tdb);
1111 tdb_trace_write(tdb, op);
1112 tdb_trace_record(tdb, rec);
1113 tdb_trace_write(tdb, " =");
1114 tdb_trace_record(tdb, ret);
1118 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1119 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1122 char msg[1 + sizeof(ret) * 4];
1124 snprintf(msg, sizeof(msg), " %#x", flag);
1125 tdb_trace_start(tdb);
1126 tdb_trace_write(tdb, op);
1127 tdb_trace_record(tdb, rec1);
1128 tdb_trace_record(tdb, rec2);
1129 tdb_trace_write(tdb, msg);
1130 tdb_trace_end_ret(tdb, ret);
1133 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1134 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1136 tdb_trace_start(tdb);
1137 tdb_trace_write(tdb, op);
1138 tdb_trace_record(tdb, rec1);
1139 tdb_trace_record(tdb, rec2);
1140 tdb_trace_write(tdb, " =");
1141 tdb_trace_record(tdb, ret);