2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 _PUBLIC_ TDB_DATA tdb_null;
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb->transaction != NULL) {
63 tdb_increment_seqnum_nonblock(tdb);
67 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
68 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
72 tdb_increment_seqnum_nonblock(tdb);
74 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
77 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
79 return memcmp(data.dptr, key.dptr, data.dsize);
82 void tdb_chainwalk_init(struct tdb_chainwalk_ctx *ctx, tdb_off_t ptr)
84 *ctx = (struct tdb_chainwalk_ctx) { .slow_ptr = ptr };
87 bool tdb_chainwalk_check(struct tdb_context *tdb,
88 struct tdb_chainwalk_ctx *ctx,
93 if (ctx->slow_chase) {
94 ret = tdb_ofs_read(tdb, ctx->slow_ptr, &ctx->slow_ptr);
99 ctx->slow_chase = !ctx->slow_chase;
101 if (next_ptr == ctx->slow_ptr) {
102 tdb->ecode = TDB_ERR_CORRUPT;
103 TDB_LOG((tdb, TDB_DEBUG_ERROR,
104 "tdb_chainwalk_check: circular chain\n"));
111 /* Returns 0 on fail. On success, return offset of record, and fills
113 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
114 struct tdb_record *r)
117 struct tdb_chainwalk_ctx chainwalk;
119 /* read in the hash top */
120 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
123 tdb_chainwalk_init(&chainwalk, rec_ptr);
125 /* keep looking until we find the right record */
129 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
132 if (!TDB_DEAD(r) && hash==r->full_hash
133 && key.dsize==r->key_len
134 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
135 r->key_len, tdb_key_compare,
141 ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
146 tdb->ecode = TDB_ERR_NOEXIST;
150 /* As tdb_find, but if you succeed, keep the lock */
151 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
152 struct tdb_record *rec)
156 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
158 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
159 tdb_unlock(tdb, BUCKET(hash), locktype);
163 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
165 struct tdb_update_hash_state {
166 const TDB_DATA *dbufs;
171 static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data)
173 struct tdb_update_hash_state *state = private_data;
174 unsigned char *dptr = data.dptr;
177 if (state->dbufs_len != data.dsize) {
181 for (i=0; i<state->num_dbufs; i++) {
182 TDB_DATA dbuf = state->dbufs[i];
184 ret = memcmp(dptr, dbuf.dptr, dbuf.dsize);
194 /* update an entry in place - this only works if the new data size
195 is <= the old data size and the key exists.
196 on failure return -1.
198 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key,
200 const TDB_DATA *dbufs, int num_dbufs,
203 struct tdb_record rec;
204 tdb_off_t rec_ptr, ofs;
208 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
211 /* it could be an exact duplicate of what is there - this is
212 * surprisingly common (eg. with a ldb re-index). */
213 if (rec.data_len == dbufs_len) {
214 struct tdb_update_hash_state state = {
215 .dbufs = dbufs, .num_dbufs = num_dbufs,
216 .dbufs_len = dbufs_len
220 ret = tdb_parse_record(tdb, key, tdb_update_hash_cmp, &state);
226 /* must be long enough key, data and tailer */
227 if (rec.rec_len < key.dsize + dbufs_len + sizeof(tdb_off_t)) {
228 tdb->ecode = TDB_SUCCESS; /* Not really an error */
232 ofs = rec_ptr + sizeof(rec) + rec.key_len;
234 for (i=0; i<num_dbufs; i++) {
235 TDB_DATA dbuf = dbufs[i];
238 ret = tdb->methods->tdb_write(tdb, ofs, dbuf.dptr, dbuf.dsize);
245 if (dbufs_len != rec.data_len) {
247 rec.data_len = dbufs_len;
248 return tdb_rec_write(tdb, rec_ptr, &rec);
254 /* find an entry in the database given a key */
255 /* If an entry doesn't exist tdb_err will be set to
256 * TDB_ERR_NOEXIST. If a key has no data attached
257 * then the TDB_DATA will have zero length but
260 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
263 struct tdb_record rec;
267 /* find which hash bucket it is in */
268 hash = tdb->hash_fn(&key);
269 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
272 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
274 ret.dsize = rec.data_len;
275 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
279 _PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
281 TDB_DATA ret = _tdb_fetch(tdb, key);
283 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
288 * Find an entry in the database and hand the record's data to a parsing
289 * function. The parsing function is executed under the chain read lock, so it
290 * should be fast and should not block on other syscalls.
292 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
294 * For mmapped tdb's that do not have a transaction open it points the parsing
295 * function directly at the mmap area, it avoids the malloc/memcpy in this
296 * case. If a transaction is open or no mmap is available, it has to do
297 * malloc/read/parse/free.
299 * This is interesting for all readers of potentially large data structures in
300 * the tdb records, ldb indexes being one example.
302 * Return -1 if the record was not found.
305 _PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
306 int (*parser)(TDB_DATA key, TDB_DATA data,
311 struct tdb_record rec;
315 /* find which hash bucket it is in */
316 hash = tdb->hash_fn(&key);
318 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
319 /* record not found */
320 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
321 tdb->ecode = TDB_ERR_NOEXIST;
324 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
326 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
327 rec.data_len, parser, private_data);
329 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
334 /* check if an entry in the database exists
336 note that 1 is returned if the key is found and 0 is returned if not found
337 this doesn't match the conventions in the rest of this module, but is
340 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
342 struct tdb_record rec;
344 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
346 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
350 _PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
352 uint32_t hash = tdb->hash_fn(&key);
355 ret = tdb_exists_hash(tdb, key, hash);
356 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
360 /* actually delete an entry in the database given the offset */
361 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec)
363 tdb_off_t last_ptr, i;
364 struct tdb_record lastrec;
366 if (tdb->read_only || tdb->traverse_read) return -1;
368 if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
369 tdb_write_lock_record(tdb, rec_ptr) == -1) {
370 /* Someone traversing here: mark it as dead */
371 rec->magic = TDB_DEAD_MAGIC;
372 return tdb_rec_write(tdb, rec_ptr, rec);
374 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
377 /* find previous record in hash chain */
378 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
380 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
381 if (tdb_rec_read(tdb, i, &lastrec) == -1)
384 /* unlink it: next ptr is at start of record. */
386 last_ptr = TDB_HASH_TOP(rec->full_hash);
387 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
390 /* recover the space */
391 if (tdb_free(tdb, rec_ptr, rec) == -1)
396 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
400 struct tdb_record rec;
402 /* read in the hash top */
403 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
407 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
410 if (rec.magic == TDB_DEAD_MAGIC) {
419 * Purge all DEAD records from a hash chain
421 int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
424 struct tdb_record rec;
427 if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == -1) {
429 * Don't block the freelist if not strictly necessary
434 /* read in the hash top */
435 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
441 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
447 if (rec.magic == TDB_DEAD_MAGIC
448 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
455 tdb_unlock(tdb, -1, F_WRLCK);
459 /* delete an entry in the database given a key */
460 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
463 struct tdb_record rec;
466 rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec);
471 if (tdb->max_dead_records != 0) {
473 uint32_t magic = TDB_DEAD_MAGIC;
476 * Allow for some dead records per hash chain, mainly for
477 * tdb's with a very high create/delete rate like locking.tdb.
480 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
482 * Don't let the per-chain freelist grow too large,
483 * delete all existing dead records
485 tdb_purge_dead(tdb, hash);
489 * Just mark the record as dead.
492 tdb, rec_ptr + offsetof(struct tdb_record, magic),
496 ret = tdb_do_delete(tdb, rec_ptr, &rec);
500 tdb_increment_seqnum(tdb);
503 if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0)
504 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
508 _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
510 uint32_t hash = tdb->hash_fn(&key);
513 ret = tdb_delete_hash(tdb, key, hash);
514 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
519 * See if we have a dead record around with enough space
521 tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
522 struct tdb_record *r, tdb_len_t length,
523 tdb_off_t *p_last_ptr)
525 tdb_off_t rec_ptr, last_ptr;
526 struct tdb_chainwalk_ctx chainwalk;
527 tdb_off_t best_rec_ptr = 0;
528 tdb_off_t best_last_ptr = 0;
529 struct tdb_record best = { .rec_len = UINT32_MAX };
531 length += sizeof(tdb_off_t); /* tailer */
533 last_ptr = TDB_HASH_TOP(hash);
535 /* read in the hash top */
536 if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1)
539 tdb_chainwalk_init(&chainwalk, rec_ptr);
541 /* keep looking until we find the right record */
545 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
548 if (TDB_DEAD(r) && (r->rec_len >= length) &&
549 (r->rec_len < best.rec_len)) {
550 best_rec_ptr = rec_ptr;
551 best_last_ptr = last_ptr;
557 ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
563 if (best.rec_len == UINT32_MAX) {
568 *p_last_ptr = best_last_ptr;
572 static int _tdb_storev(struct tdb_context *tdb, TDB_DATA key,
573 const TDB_DATA *dbufs, int num_dbufs,
574 int flag, uint32_t hash)
576 struct tdb_record rec;
577 tdb_off_t rec_ptr, ofs;
578 tdb_len_t rec_len, dbufs_len;
584 for (i=0; i<num_dbufs; i++) {
585 size_t dsize = dbufs[i].dsize;
587 if ((dsize != 0) && (dbufs[i].dptr == NULL)) {
588 tdb->ecode = TDB_ERR_EINVAL;
593 if (dbufs_len < dsize) {
594 tdb->ecode = TDB_ERR_OOM;
599 rec_len = key.dsize + dbufs_len;
600 if ((rec_len < key.dsize) || (rec_len < dbufs_len)) {
601 tdb->ecode = TDB_ERR_OOM;
605 /* check for it existing, on insert. */
606 if (flag == TDB_INSERT) {
607 if (tdb_exists_hash(tdb, key, hash)) {
608 tdb->ecode = TDB_ERR_EXISTS;
612 /* first try in-place update, on modify or replace. */
613 if (tdb_update_hash(tdb, key, hash, dbufs, num_dbufs,
617 if (tdb->ecode == TDB_ERR_NOEXIST &&
618 flag == TDB_MODIFY) {
619 /* if the record doesn't exist and we are in TDB_MODIFY mode then
620 we should fail the store */
624 /* reset the error code potentially set by the tdb_update_hash() */
625 tdb->ecode = TDB_SUCCESS;
627 /* delete any existing record - if it doesn't exist we don't
628 care. Doing this first reduces fragmentation, and avoids
629 coalescing with `allocated' block before it's updated. */
630 if (flag != TDB_INSERT)
631 tdb_delete_hash(tdb, key, hash);
633 /* we have to allocate some space */
634 rec_ptr = tdb_allocate(tdb, hash, rec_len, &rec);
640 /* Read hash top into next ptr */
641 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
644 rec.key_len = key.dsize;
645 rec.data_len = dbufs_len;
646 rec.full_hash = hash;
647 rec.magic = TDB_MAGIC;
651 /* write out and point the top of the hash chain at it */
652 ret = tdb_rec_write(tdb, ofs, &rec);
658 ret = tdb->methods->tdb_write(tdb, ofs, key.dptr, key.dsize);
664 for (i=0; i<num_dbufs; i++) {
665 if (dbufs[i].dsize == 0) {
669 ret = tdb->methods->tdb_write(tdb, ofs, dbufs[i].dptr,
674 ofs += dbufs[i].dsize;
677 ret = tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr);
679 /* Need to tdb_unallocate() here */
687 tdb_increment_seqnum(tdb);
692 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
693 TDB_DATA dbuf, int flag, uint32_t hash)
695 return _tdb_storev(tdb, key, &dbuf, 1, flag, hash);
698 /* store an element in the database, replacing any existing element
701 return 0 on success, -1 on failure
703 _PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
708 if (tdb->read_only || tdb->traverse_read) {
709 tdb->ecode = TDB_ERR_RDONLY;
710 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
714 /* find which hash bucket it is in */
715 hash = tdb->hash_fn(&key);
716 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
719 ret = _tdb_store(tdb, key, dbuf, flag, hash);
720 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
721 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
725 _PUBLIC_ int tdb_storev(struct tdb_context *tdb, TDB_DATA key,
726 const TDB_DATA *dbufs, int num_dbufs, int flag)
731 if (tdb->read_only || tdb->traverse_read) {
732 tdb->ecode = TDB_ERR_RDONLY;
733 tdb_trace_1plusn_rec_flag_ret(tdb, "tdb_storev", key,
734 dbufs, num_dbufs, flag, -1);
738 /* find which hash bucket it is in */
739 hash = tdb->hash_fn(&key);
740 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
743 ret = _tdb_storev(tdb, key, dbufs, num_dbufs, flag, hash);
744 tdb_trace_1plusn_rec_flag_ret(tdb, "tdb_storev", key,
745 dbufs, num_dbufs, flag, -1);
746 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
750 /* Append to an entry. Create if not exist. */
751 _PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
757 /* find which hash bucket it is in */
758 hash = tdb->hash_fn(&key);
759 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
762 dbufs[0] = _tdb_fetch(tdb, key);
765 ret = _tdb_storev(tdb, key, dbufs, 2, 0, hash);
766 tdb_trace_2rec_retrec(tdb, "tdb_append", key, dbufs[0], dbufs[1]);
768 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
769 SAFE_FREE(dbufs[0].dptr);
775 return the name of the current tdb file
776 useful for external logging functions
778 _PUBLIC_ const char *tdb_name(struct tdb_context *tdb)
784 return the underlying file descriptor being used by tdb, or -1
785 useful for external routines that want to check the device/inode
788 _PUBLIC_ int tdb_fd(struct tdb_context *tdb)
794 return the current logging function
795 useful for external tdb routines that wish to log tdb errors
797 _PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb)
799 return tdb->log.log_fn;
804 get the tdb sequence number. Only makes sense if the writers opened
805 with TDB_SEQNUM set. Note that this sequence number will wrap quite
806 quickly, so it should only be used for a 'has something changed'
807 test, not for code that relies on the count of the number of changes
808 made. If you want a counter then use a tdb record.
810 The aim of this sequence number is to allow for a very lightweight
811 test of a possible tdb change.
813 _PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb)
817 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
821 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb)
823 return tdb->hash_size;
826 _PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb)
828 return tdb->map_size;
831 _PUBLIC_ int tdb_get_flags(struct tdb_context *tdb)
836 _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
838 if ((flags & TDB_ALLOW_NESTING) &&
839 (flags & TDB_DISALLOW_NESTING)) {
840 tdb->ecode = TDB_ERR_NESTING;
841 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
842 "allow_nesting and disallow_nesting are not allowed together!"));
846 if (flags & TDB_ALLOW_NESTING) {
847 tdb->flags &= ~TDB_DISALLOW_NESTING;
849 if (flags & TDB_DISALLOW_NESTING) {
850 tdb->flags &= ~TDB_ALLOW_NESTING;
856 _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
858 if ((flags & TDB_ALLOW_NESTING) &&
859 (flags & TDB_DISALLOW_NESTING)) {
860 tdb->ecode = TDB_ERR_NESTING;
861 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
862 "allow_nesting and disallow_nesting are not allowed together!"));
866 if ((flags & TDB_NOLOCK) &&
867 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) &&
868 (tdb->mutexes == NULL)) {
869 tdb->ecode = TDB_ERR_LOCK;
870 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
871 "Can not remove NOLOCK flag on mutexed databases"));
875 if (flags & TDB_ALLOW_NESTING) {
876 tdb->flags |= TDB_DISALLOW_NESTING;
878 if (flags & TDB_DISALLOW_NESTING) {
879 tdb->flags |= TDB_ALLOW_NESTING;
882 tdb->flags &= ~flags;
887 enable sequence number handling on an open tdb
889 _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb)
891 tdb->flags |= TDB_SEQNUM;
896 add a region of the file to the freelist. Length is the size of the region in bytes,
897 which includes the free list header that needs to be added
899 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
901 struct tdb_record rec;
902 if (length <= sizeof(rec)) {
903 /* the region is not worth adding */
906 if (length + offset > tdb->map_size) {
907 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
910 memset(&rec,'\0',sizeof(rec));
911 rec.rec_len = length - sizeof(rec);
912 if (tdb_free(tdb, offset, &rec) == -1) {
913 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
920 wipe the entire database, deleting all records. This can be done
921 very fast by using a allrecord lock. The entire data portion of the
922 file becomes a single entry in the freelist.
924 This code carefully steps around the recovery area, leaving it alone
926 _PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb)
929 tdb_off_t offset = 0;
931 tdb_off_t recovery_head;
932 tdb_len_t recovery_size = 0;
934 if (tdb_lockall(tdb) != 0) {
938 tdb_trace(tdb, "tdb_wipe_all");
940 /* see if the tdb has a recovery area, and remember its size
941 if so. We don't want to lose this as otherwise each
942 tdb_wipe_all() in a transaction will increase the size of
943 the tdb by the size of the recovery area */
944 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
945 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
949 if (recovery_head != 0) {
950 struct tdb_record rec;
951 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
952 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
955 recovery_size = rec.rec_len + sizeof(rec);
958 /* wipe the hashes */
959 for (i=0;i<tdb->hash_size;i++) {
960 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
961 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
966 /* wipe the freelist */
967 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
968 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
972 /* add all the rest of the file to the freelist, possibly leaving a gap
973 for the recovery area */
974 if (recovery_size == 0) {
975 /* the simple case - the whole file can be used as a freelist */
976 data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size));
977 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
981 /* we need to add two freelist entries - one on either
982 side of the recovery area
984 Note that we cannot shift the recovery area during
985 this operation. Only the transaction.c code may
986 move the recovery area or we risk subtle data
989 data_len = (recovery_head - TDB_DATA_START(tdb->hash_size));
990 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
993 /* and the 2nd free list entry after the recovery area - if any */
994 data_len = tdb->map_size - (recovery_head+recovery_size);
995 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
1000 tdb_increment_seqnum_nonblock(tdb);
1002 if (tdb_unlockall(tdb) != 0) {
1003 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
1014 struct traverse_state {
1016 struct tdb_context *dest_db;
1020 traverse function for repacking
1022 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
1024 struct traverse_state *state = (struct traverse_state *)private_data;
1025 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
1026 state->error = true;
1035 _PUBLIC_ int tdb_repack(struct tdb_context *tdb)
1037 struct tdb_context *tmp_db;
1038 struct traverse_state state;
1040 tdb_trace(tdb, "tdb_repack");
1042 if (tdb_transaction_start(tdb) != 0) {
1043 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
1047 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
1048 if (tmp_db == NULL) {
1049 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
1050 tdb_transaction_cancel(tdb);
1054 state.error = false;
1055 state.dest_db = tmp_db;
1057 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
1058 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
1059 tdb_transaction_cancel(tdb);
1065 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
1066 tdb_transaction_cancel(tdb);
1071 if (tdb_wipe_all(tdb) != 0) {
1072 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
1073 tdb_transaction_cancel(tdb);
1078 state.error = false;
1079 state.dest_db = tdb;
1081 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
1082 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
1083 tdb_transaction_cancel(tdb);
1089 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
1090 tdb_transaction_cancel(tdb);
1097 if (tdb_transaction_commit(tdb) != 0) {
1098 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
1105 /* Even on files, we can get partial writes due to signals. */
1106 bool tdb_write_all(int fd, const void *buf, size_t count)
1110 ret = write(fd, buf, count);
1113 buf = (const char *)buf + ret;
1119 bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret)
1121 tdb_off_t ret = a + b;
1123 if ((ret < a) || (ret < b)) {
1131 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
1133 if (!tdb_write_all(tdb->tracefd, str, strlen(str))) {
1134 close(tdb->tracefd);
1139 static void tdb_trace_start(struct tdb_context *tdb)
1142 char msg[sizeof(tdb_off_t) * 4 + 1];
1144 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1145 snprintf(msg, sizeof(msg), "%u ", seqnum);
1146 tdb_trace_write(tdb, msg);
1149 static void tdb_trace_end(struct tdb_context *tdb)
1151 tdb_trace_write(tdb, "\n");
1154 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1156 char msg[sizeof(ret) * 4 + 4];
1157 snprintf(msg, sizeof(msg), " = %i\n", ret);
1158 tdb_trace_write(tdb, msg);
1161 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1163 char msg[20 + rec.dsize*2], *p;
1166 /* We differentiate zero-length records from non-existent ones. */
1167 if (rec.dptr == NULL) {
1168 tdb_trace_write(tdb, " NULL");
1172 /* snprintf here is purely cargo-cult programming. */
1174 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1175 for (i = 0; i < rec.dsize; i++)
1176 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1178 tdb_trace_write(tdb, msg);
1181 void tdb_trace(struct tdb_context *tdb, const char *op)
1183 tdb_trace_start(tdb);
1184 tdb_trace_write(tdb, op);
1188 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1190 char msg[sizeof(tdb_off_t) * 4 + 1];
1192 snprintf(msg, sizeof(msg), "%u ", seqnum);
1193 tdb_trace_write(tdb, msg);
1194 tdb_trace_write(tdb, op);
1198 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1199 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1203 snprintf(msg, sizeof(msg),
1204 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1205 tdb_trace_start(tdb);
1206 tdb_trace_write(tdb, msg);
1210 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1212 tdb_trace_start(tdb);
1213 tdb_trace_write(tdb, op);
1214 tdb_trace_end_ret(tdb, ret);
1217 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1219 tdb_trace_start(tdb);
1220 tdb_trace_write(tdb, op);
1221 tdb_trace_write(tdb, " =");
1222 tdb_trace_record(tdb, ret);
1226 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1229 tdb_trace_start(tdb);
1230 tdb_trace_write(tdb, op);
1231 tdb_trace_record(tdb, rec);
1235 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1236 TDB_DATA rec, int ret)
1238 tdb_trace_start(tdb);
1239 tdb_trace_write(tdb, op);
1240 tdb_trace_record(tdb, rec);
1241 tdb_trace_end_ret(tdb, ret);
1244 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1245 TDB_DATA rec, TDB_DATA ret)
1247 tdb_trace_start(tdb);
1248 tdb_trace_write(tdb, op);
1249 tdb_trace_record(tdb, rec);
1250 tdb_trace_write(tdb, " =");
1251 tdb_trace_record(tdb, ret);
1255 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1256 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1259 char msg[1 + sizeof(ret) * 4];
1261 snprintf(msg, sizeof(msg), " %#x", flag);
1262 tdb_trace_start(tdb);
1263 tdb_trace_write(tdb, op);
1264 tdb_trace_record(tdb, rec1);
1265 tdb_trace_record(tdb, rec2);
1266 tdb_trace_write(tdb, msg);
1267 tdb_trace_end_ret(tdb, ret);
1270 void tdb_trace_1plusn_rec_flag_ret(struct tdb_context *tdb, const char *op,
1272 const TDB_DATA *recs, int num_recs,
1273 unsigned flag, int ret)
1275 char msg[1 + sizeof(ret) * 4];
1278 snprintf(msg, sizeof(msg), " %#x", flag);
1279 tdb_trace_start(tdb);
1280 tdb_trace_write(tdb, op);
1281 tdb_trace_record(tdb, rec);
1282 for (i=0; i<num_recs; i++) {
1283 tdb_trace_record(tdb, recs[i]);
1285 tdb_trace_write(tdb, msg);
1286 tdb_trace_end_ret(tdb, ret);
1289 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1290 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1292 tdb_trace_start(tdb);
1293 tdb_trace_write(tdb, op);
1294 tdb_trace_record(tdb, rec1);
1295 tdb_trace_record(tdb, rec2);
1296 tdb_trace_write(tdb, " =");
1297 tdb_trace_record(tdb, ret);