2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 _PUBLIC_ TDB_DATA tdb_null;
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb->transaction != NULL) {
63 tdb_increment_seqnum_nonblock(tdb);
67 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
68 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
72 tdb_increment_seqnum_nonblock(tdb);
74 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
77 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
79 return memcmp(data.dptr, key.dptr, data.dsize);
82 /* Returns 0 on fail. On success, return offset of record, and fills
84 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
89 /* read in the hash top */
90 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
93 /* keep looking until we find the right record */
95 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
98 if (!TDB_DEAD(r) && hash==r->full_hash
99 && key.dsize==r->key_len
100 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
101 r->key_len, tdb_key_compare,
105 /* detect tight infinite loop */
106 if (rec_ptr == r->next) {
107 tdb->ecode = TDB_ERR_CORRUPT;
108 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_find: loop detected.\n"));
113 tdb->ecode = TDB_ERR_NOEXIST;
117 /* As tdb_find, but if you succeed, keep the lock */
118 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
119 struct tdb_record *rec)
123 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
125 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
126 tdb_unlock(tdb, BUCKET(hash), locktype);
130 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
132 static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data)
134 TDB_DATA *dbuf = (TDB_DATA *)private_data;
136 if (dbuf->dsize != data.dsize) {
139 if (memcmp(dbuf->dptr, data.dptr, data.dsize) != 0) {
145 /* update an entry in place - this only works if the new data size
146 is <= the old data size and the key exists.
147 on failure return -1.
149 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
151 struct tdb_record rec;
155 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
158 /* it could be an exact duplicate of what is there - this is
159 * surprisingly common (eg. with a ldb re-index). */
160 if (rec.data_len == dbuf.dsize &&
161 tdb_parse_record(tdb, key, tdb_update_hash_cmp, &dbuf) == 0) {
165 /* must be long enough key, data and tailer */
166 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
167 tdb->ecode = TDB_SUCCESS; /* Not really an error */
171 if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
172 dbuf.dptr, dbuf.dsize) == -1)
175 if (dbuf.dsize != rec.data_len) {
177 rec.data_len = dbuf.dsize;
178 return tdb_rec_write(tdb, rec_ptr, &rec);
184 /* find an entry in the database given a key */
185 /* If an entry doesn't exist tdb_err will be set to
186 * TDB_ERR_NOEXIST. If a key has no data attached
187 * then the TDB_DATA will have zero length but
190 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
193 struct tdb_record rec;
197 /* find which hash bucket it is in */
198 hash = tdb->hash_fn(&key);
199 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
202 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
204 ret.dsize = rec.data_len;
205 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
209 _PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
211 TDB_DATA ret = _tdb_fetch(tdb, key);
213 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
218 * Find an entry in the database and hand the record's data to a parsing
219 * function. The parsing function is executed under the chain read lock, so it
220 * should be fast and should not block on other syscalls.
222 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
224 * For mmapped tdb's that do not have a transaction open it points the parsing
225 * function directly at the mmap area, it avoids the malloc/memcpy in this
226 * case. If a transaction is open or no mmap is available, it has to do
227 * malloc/read/parse/free.
229 * This is interesting for all readers of potentially large data structures in
230 * the tdb records, ldb indexes being one example.
232 * Return -1 if the record was not found.
235 _PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
236 int (*parser)(TDB_DATA key, TDB_DATA data,
241 struct tdb_record rec;
245 /* find which hash bucket it is in */
246 hash = tdb->hash_fn(&key);
248 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
249 /* record not found */
250 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
251 tdb->ecode = TDB_ERR_NOEXIST;
254 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
256 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
257 rec.data_len, parser, private_data);
259 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
264 /* check if an entry in the database exists
266 note that 1 is returned if the key is found and 0 is returned if not found
267 this doesn't match the conventions in the rest of this module, but is
270 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
272 struct tdb_record rec;
274 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
276 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
280 _PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
282 uint32_t hash = tdb->hash_fn(&key);
285 ret = tdb_exists_hash(tdb, key, hash);
286 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
290 /* actually delete an entry in the database given the offset */
291 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec)
293 tdb_off_t last_ptr, i;
294 struct tdb_record lastrec;
296 if (tdb->read_only || tdb->traverse_read) return -1;
298 if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
299 tdb_write_lock_record(tdb, rec_ptr) == -1) {
300 /* Someone traversing here: mark it as dead */
301 rec->magic = TDB_DEAD_MAGIC;
302 return tdb_rec_write(tdb, rec_ptr, rec);
304 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
307 /* find previous record in hash chain */
308 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
310 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
311 if (tdb_rec_read(tdb, i, &lastrec) == -1)
314 /* unlink it: next ptr is at start of record. */
316 last_ptr = TDB_HASH_TOP(rec->full_hash);
317 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
320 /* recover the space */
321 if (tdb_free(tdb, rec_ptr, rec) == -1)
326 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
330 struct tdb_record rec;
332 /* read in the hash top */
333 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
337 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
340 if (rec.magic == TDB_DEAD_MAGIC) {
349 * Purge all DEAD records from a hash chain
351 int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
354 struct tdb_record rec;
357 if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == -1) {
359 * Don't block the freelist if not strictly necessary
364 /* read in the hash top */
365 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
371 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
377 if (rec.magic == TDB_DEAD_MAGIC
378 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
385 tdb_unlock(tdb, -1, F_WRLCK);
389 /* delete an entry in the database given a key */
390 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
393 struct tdb_record rec;
396 rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec);
401 if (tdb->max_dead_records != 0) {
403 uint32_t magic = TDB_DEAD_MAGIC;
406 * Allow for some dead records per hash chain, mainly for
407 * tdb's with a very high create/delete rate like locking.tdb.
410 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
412 * Don't let the per-chain freelist grow too large,
413 * delete all existing dead records
415 tdb_purge_dead(tdb, hash);
419 * Just mark the record as dead.
422 tdb, rec_ptr + offsetof(struct tdb_record, magic),
426 ret = tdb_do_delete(tdb, rec_ptr, &rec);
430 tdb_increment_seqnum(tdb);
433 if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0)
434 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
438 _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
440 uint32_t hash = tdb->hash_fn(&key);
443 ret = tdb_delete_hash(tdb, key, hash);
444 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
449 * See if we have a dead record around with enough space
451 tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
452 struct tdb_record *r, tdb_len_t length,
453 tdb_off_t *p_last_ptr)
455 tdb_off_t rec_ptr, last_ptr;
456 tdb_off_t best_rec_ptr = 0;
457 tdb_off_t best_last_ptr = 0;
458 struct tdb_record best = { .rec_len = UINT32_MAX };
460 length += sizeof(tdb_off_t); /* tailer */
462 last_ptr = TDB_HASH_TOP(hash);
464 /* read in the hash top */
465 if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1)
468 /* keep looking until we find the right record */
470 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
473 if (TDB_DEAD(r) && (r->rec_len >= length) &&
474 (r->rec_len < best.rec_len)) {
475 best_rec_ptr = rec_ptr;
476 best_last_ptr = last_ptr;
483 if (best.rec_len == UINT32_MAX) {
488 *p_last_ptr = best_last_ptr;
492 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
493 TDB_DATA dbuf, int flag, uint32_t hash)
495 struct tdb_record rec;
500 rec_len = key.dsize + dbuf.dsize;
501 if ((rec_len < key.dsize) || (rec_len < dbuf.dsize)) {
502 tdb->ecode = TDB_ERR_OOM;
506 /* check for it existing, on insert. */
507 if (flag == TDB_INSERT) {
508 if (tdb_exists_hash(tdb, key, hash)) {
509 tdb->ecode = TDB_ERR_EXISTS;
513 /* first try in-place update, on modify or replace. */
514 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
517 if (tdb->ecode == TDB_ERR_NOEXIST &&
518 flag == TDB_MODIFY) {
519 /* if the record doesn't exist and we are in TDB_MODIFY mode then
520 we should fail the store */
524 /* reset the error code potentially set by the tdb_update_hash() */
525 tdb->ecode = TDB_SUCCESS;
527 /* delete any existing record - if it doesn't exist we don't
528 care. Doing this first reduces fragmentation, and avoids
529 coalescing with `allocated' block before it's updated. */
530 if (flag != TDB_INSERT)
531 tdb_delete_hash(tdb, key, hash);
533 /* we have to allocate some space */
534 rec_ptr = tdb_allocate(tdb, hash, rec_len, &rec);
540 /* Read hash top into next ptr */
541 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
544 rec.key_len = key.dsize;
545 rec.data_len = dbuf.dsize;
546 rec.full_hash = hash;
547 rec.magic = TDB_MAGIC;
549 /* write out and point the top of the hash chain at it */
550 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
551 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec),
552 key.dptr, key.dsize) == -1
553 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec)+key.dsize,
554 dbuf.dptr, dbuf.dsize) == -1
555 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
556 /* Need to tdb_unallocate() here */
564 tdb_increment_seqnum(tdb);
569 /* store an element in the database, replacing any existing element
572 return 0 on success, -1 on failure
574 _PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
579 if (tdb->read_only || tdb->traverse_read) {
580 tdb->ecode = TDB_ERR_RDONLY;
581 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
585 /* find which hash bucket it is in */
586 hash = tdb->hash_fn(&key);
587 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
590 ret = _tdb_store(tdb, key, dbuf, flag, hash);
591 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
592 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
596 /* Append to an entry. Create if not exist. */
597 _PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
603 /* find which hash bucket it is in */
604 hash = tdb->hash_fn(&key);
605 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
608 dbuf = _tdb_fetch(tdb, key);
610 if (dbuf.dptr == NULL) {
611 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
613 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
614 unsigned char *new_dptr;
616 /* realloc '0' is special: don't do that. */
619 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
620 if (new_dptr == NULL) {
623 dbuf.dptr = new_dptr;
626 if (dbuf.dptr == NULL) {
627 tdb->ecode = TDB_ERR_OOM;
631 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
632 dbuf.dsize += new_dbuf.dsize;
634 ret = _tdb_store(tdb, key, dbuf, 0, hash);
635 tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
638 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
639 SAFE_FREE(dbuf.dptr);
645 return the name of the current tdb file
646 useful for external logging functions
648 _PUBLIC_ const char *tdb_name(struct tdb_context *tdb)
654 return the underlying file descriptor being used by tdb, or -1
655 useful for external routines that want to check the device/inode
658 _PUBLIC_ int tdb_fd(struct tdb_context *tdb)
664 return the current logging function
665 useful for external tdb routines that wish to log tdb errors
667 _PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb)
669 return tdb->log.log_fn;
674 get the tdb sequence number. Only makes sense if the writers opened
675 with TDB_SEQNUM set. Note that this sequence number will wrap quite
676 quickly, so it should only be used for a 'has something changed'
677 test, not for code that relies on the count of the number of changes
678 made. If you want a counter then use a tdb record.
680 The aim of this sequence number is to allow for a very lightweight
681 test of a possible tdb change.
683 _PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb)
687 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
691 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb)
693 return tdb->hash_size;
696 _PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb)
698 return tdb->map_size;
701 _PUBLIC_ int tdb_get_flags(struct tdb_context *tdb)
706 _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
708 if ((flags & TDB_ALLOW_NESTING) &&
709 (flags & TDB_DISALLOW_NESTING)) {
710 tdb->ecode = TDB_ERR_NESTING;
711 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
712 "allow_nesting and disallow_nesting are not allowed together!"));
716 if (flags & TDB_ALLOW_NESTING) {
717 tdb->flags &= ~TDB_DISALLOW_NESTING;
719 if (flags & TDB_DISALLOW_NESTING) {
720 tdb->flags &= ~TDB_ALLOW_NESTING;
726 _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
728 if ((flags & TDB_ALLOW_NESTING) &&
729 (flags & TDB_DISALLOW_NESTING)) {
730 tdb->ecode = TDB_ERR_NESTING;
731 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
732 "allow_nesting and disallow_nesting are not allowed together!"));
736 if ((flags & TDB_NOLOCK) &&
737 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) &&
738 (tdb->mutexes == NULL)) {
739 tdb->ecode = TDB_ERR_LOCK;
740 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
741 "Can not remove NOLOCK flag on mutexed databases"));
745 if (flags & TDB_ALLOW_NESTING) {
746 tdb->flags |= TDB_DISALLOW_NESTING;
748 if (flags & TDB_DISALLOW_NESTING) {
749 tdb->flags |= TDB_ALLOW_NESTING;
752 tdb->flags &= ~flags;
757 enable sequence number handling on an open tdb
759 _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb)
761 tdb->flags |= TDB_SEQNUM;
766 add a region of the file to the freelist. Length is the size of the region in bytes,
767 which includes the free list header that needs to be added
769 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
771 struct tdb_record rec;
772 if (length <= sizeof(rec)) {
773 /* the region is not worth adding */
776 if (length + offset > tdb->map_size) {
777 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
780 memset(&rec,'\0',sizeof(rec));
781 rec.rec_len = length - sizeof(rec);
782 if (tdb_free(tdb, offset, &rec) == -1) {
783 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
790 wipe the entire database, deleting all records. This can be done
791 very fast by using a allrecord lock. The entire data portion of the
792 file becomes a single entry in the freelist.
794 This code carefully steps around the recovery area, leaving it alone
796 _PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb)
799 tdb_off_t offset = 0;
801 tdb_off_t recovery_head;
802 tdb_len_t recovery_size = 0;
804 if (tdb_lockall(tdb) != 0) {
808 tdb_trace(tdb, "tdb_wipe_all");
810 /* see if the tdb has a recovery area, and remember its size
811 if so. We don't want to lose this as otherwise each
812 tdb_wipe_all() in a transaction will increase the size of
813 the tdb by the size of the recovery area */
814 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
815 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
819 if (recovery_head != 0) {
820 struct tdb_record rec;
821 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
822 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
825 recovery_size = rec.rec_len + sizeof(rec);
828 /* wipe the hashes */
829 for (i=0;i<tdb->hash_size;i++) {
830 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
831 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
836 /* wipe the freelist */
837 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
838 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
842 /* add all the rest of the file to the freelist, possibly leaving a gap
843 for the recovery area */
844 if (recovery_size == 0) {
845 /* the simple case - the whole file can be used as a freelist */
846 data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size));
847 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
851 /* we need to add two freelist entries - one on either
852 side of the recovery area
854 Note that we cannot shift the recovery area during
855 this operation. Only the transaction.c code may
856 move the recovery area or we risk subtle data
859 data_len = (recovery_head - TDB_DATA_START(tdb->hash_size));
860 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
863 /* and the 2nd free list entry after the recovery area - if any */
864 data_len = tdb->map_size - (recovery_head+recovery_size);
865 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
870 tdb_increment_seqnum_nonblock(tdb);
872 if (tdb_unlockall(tdb) != 0) {
873 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
884 struct traverse_state {
886 struct tdb_context *dest_db;
890 traverse function for repacking
892 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
894 struct traverse_state *state = (struct traverse_state *)private_data;
895 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
905 _PUBLIC_ int tdb_repack(struct tdb_context *tdb)
907 struct tdb_context *tmp_db;
908 struct traverse_state state;
910 tdb_trace(tdb, "tdb_repack");
912 if (tdb_transaction_start(tdb) != 0) {
913 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
917 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
918 if (tmp_db == NULL) {
919 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
920 tdb_transaction_cancel(tdb);
925 state.dest_db = tmp_db;
927 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
928 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
929 tdb_transaction_cancel(tdb);
935 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
936 tdb_transaction_cancel(tdb);
941 if (tdb_wipe_all(tdb) != 0) {
942 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
943 tdb_transaction_cancel(tdb);
951 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
952 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
953 tdb_transaction_cancel(tdb);
959 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
960 tdb_transaction_cancel(tdb);
967 if (tdb_transaction_commit(tdb) != 0) {
968 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
975 /* Even on files, we can get partial writes due to signals. */
976 bool tdb_write_all(int fd, const void *buf, size_t count)
980 ret = write(fd, buf, count);
983 buf = (const char *)buf + ret;
989 bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret)
991 tdb_off_t ret = a + b;
993 if ((ret < a) || (ret < b)) {
1001 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
1003 if (!tdb_write_all(tdb->tracefd, str, strlen(str))) {
1004 close(tdb->tracefd);
1009 static void tdb_trace_start(struct tdb_context *tdb)
1012 char msg[sizeof(tdb_off_t) * 4 + 1];
1014 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1015 snprintf(msg, sizeof(msg), "%u ", seqnum);
1016 tdb_trace_write(tdb, msg);
1019 static void tdb_trace_end(struct tdb_context *tdb)
1021 tdb_trace_write(tdb, "\n");
1024 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1026 char msg[sizeof(ret) * 4 + 4];
1027 snprintf(msg, sizeof(msg), " = %i\n", ret);
1028 tdb_trace_write(tdb, msg);
1031 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1033 char msg[20 + rec.dsize*2], *p;
1036 /* We differentiate zero-length records from non-existent ones. */
1037 if (rec.dptr == NULL) {
1038 tdb_trace_write(tdb, " NULL");
1042 /* snprintf here is purely cargo-cult programming. */
1044 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1045 for (i = 0; i < rec.dsize; i++)
1046 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1048 tdb_trace_write(tdb, msg);
1051 void tdb_trace(struct tdb_context *tdb, const char *op)
1053 tdb_trace_start(tdb);
1054 tdb_trace_write(tdb, op);
1058 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1060 char msg[sizeof(tdb_off_t) * 4 + 1];
1062 snprintf(msg, sizeof(msg), "%u ", seqnum);
1063 tdb_trace_write(tdb, msg);
1064 tdb_trace_write(tdb, op);
1068 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1069 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1073 snprintf(msg, sizeof(msg),
1074 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1075 tdb_trace_start(tdb);
1076 tdb_trace_write(tdb, msg);
1080 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1082 tdb_trace_start(tdb);
1083 tdb_trace_write(tdb, op);
1084 tdb_trace_end_ret(tdb, ret);
1087 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1089 tdb_trace_start(tdb);
1090 tdb_trace_write(tdb, op);
1091 tdb_trace_write(tdb, " =");
1092 tdb_trace_record(tdb, ret);
1096 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1099 tdb_trace_start(tdb);
1100 tdb_trace_write(tdb, op);
1101 tdb_trace_record(tdb, rec);
1105 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1106 TDB_DATA rec, int ret)
1108 tdb_trace_start(tdb);
1109 tdb_trace_write(tdb, op);
1110 tdb_trace_record(tdb, rec);
1111 tdb_trace_end_ret(tdb, ret);
1114 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1115 TDB_DATA rec, TDB_DATA ret)
1117 tdb_trace_start(tdb);
1118 tdb_trace_write(tdb, op);
1119 tdb_trace_record(tdb, rec);
1120 tdb_trace_write(tdb, " =");
1121 tdb_trace_record(tdb, ret);
1125 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1126 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1129 char msg[1 + sizeof(ret) * 4];
1131 snprintf(msg, sizeof(msg), " %#x", flag);
1132 tdb_trace_start(tdb);
1133 tdb_trace_write(tdb, op);
1134 tdb_trace_record(tdb, rec1);
1135 tdb_trace_record(tdb, rec2);
1136 tdb_trace_write(tdb, msg);
1137 tdb_trace_end_ret(tdb, ret);
1140 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1141 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1143 tdb_trace_start(tdb);
1144 tdb_trace_write(tdb, op);
1145 tdb_trace_record(tdb, rec1);
1146 tdb_trace_record(tdb, rec2);
1147 tdb_trace_write(tdb, " =");
1148 tdb_trace_record(tdb, ret);