2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 _PUBLIC_ TDB_DATA tdb_null;
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb->transaction != NULL) {
63 tdb_increment_seqnum_nonblock(tdb);
67 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
68 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
72 tdb_increment_seqnum_nonblock(tdb);
74 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
77 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
79 return memcmp(data.dptr, key.dptr, data.dsize);
82 void tdb_chainwalk_init(struct tdb_chainwalk_ctx *ctx, tdb_off_t ptr)
84 *ctx = (struct tdb_chainwalk_ctx) { .slow_ptr = ptr };
87 bool tdb_chainwalk_check(struct tdb_context *tdb,
88 struct tdb_chainwalk_ctx *ctx,
93 if (ctx->slow_chase) {
94 ret = tdb_ofs_read(tdb, ctx->slow_ptr, &ctx->slow_ptr);
99 ctx->slow_chase = !ctx->slow_chase;
101 if (next_ptr == ctx->slow_ptr) {
102 tdb->ecode = TDB_ERR_CORRUPT;
103 TDB_LOG((tdb, TDB_DEBUG_ERROR,
104 "tdb_chainwalk_check: circular chain\n"));
111 /* Returns 0 on fail. On success, return offset of record, and fills
113 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
114 struct tdb_record *r)
117 struct tdb_chainwalk_ctx chainwalk;
119 /* read in the hash top */
120 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
123 tdb_chainwalk_init(&chainwalk, rec_ptr);
125 /* keep looking until we find the right record */
129 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
132 if (!TDB_DEAD(r) && hash==r->full_hash
133 && key.dsize==r->key_len
134 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
135 r->key_len, tdb_key_compare,
141 ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
146 tdb->ecode = TDB_ERR_NOEXIST;
150 /* As tdb_find, but if you succeed, keep the lock */
151 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
152 struct tdb_record *rec)
156 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
158 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
159 tdb_unlock(tdb, BUCKET(hash), locktype);
163 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
165 struct tdb_update_hash_state {
166 const TDB_DATA *dbufs;
171 static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data)
173 struct tdb_update_hash_state *state = private_data;
174 unsigned char *dptr = data.dptr;
177 if (state->dbufs_len != data.dsize) {
181 for (i=0; i<state->num_dbufs; i++) {
182 TDB_DATA dbuf = state->dbufs[i];
183 if( dbuf.dsize > 0) {
185 ret = memcmp(dptr, dbuf.dptr, dbuf.dsize);
196 /* update an entry in place - this only works if the new data size
197 is <= the old data size and the key exists.
198 on failure return -1.
200 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key,
202 const TDB_DATA *dbufs, int num_dbufs,
205 struct tdb_record rec;
206 tdb_off_t rec_ptr, ofs;
210 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
213 /* it could be an exact duplicate of what is there - this is
214 * surprisingly common (eg. with a ldb re-index). */
215 if (rec.data_len == dbufs_len) {
216 struct tdb_update_hash_state state = {
217 .dbufs = dbufs, .num_dbufs = num_dbufs,
218 .dbufs_len = dbufs_len
222 ret = tdb_parse_record(tdb, key, tdb_update_hash_cmp, &state);
228 /* must be long enough key, data and tailer */
229 if (rec.rec_len < key.dsize + dbufs_len + sizeof(tdb_off_t)) {
230 tdb->ecode = TDB_SUCCESS; /* Not really an error */
234 ofs = rec_ptr + sizeof(rec) + rec.key_len;
236 for (i=0; i<num_dbufs; i++) {
237 TDB_DATA dbuf = dbufs[i];
240 ret = tdb->methods->tdb_write(tdb, ofs, dbuf.dptr, dbuf.dsize);
247 if (dbufs_len != rec.data_len) {
249 rec.data_len = dbufs_len;
250 return tdb_rec_write(tdb, rec_ptr, &rec);
256 /* find an entry in the database given a key */
257 /* If an entry doesn't exist tdb_err will be set to
258 * TDB_ERR_NOEXIST. If a key has no data attached
259 * then the TDB_DATA will have zero length but
262 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
265 struct tdb_record rec;
269 /* find which hash bucket it is in */
270 hash = tdb->hash_fn(&key);
271 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
274 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
276 ret.dsize = rec.data_len;
277 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
281 _PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
283 TDB_DATA ret = _tdb_fetch(tdb, key);
285 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
290 * Find an entry in the database and hand the record's data to a parsing
291 * function. The parsing function is executed under the chain read lock, so it
292 * should be fast and should not block on other syscalls.
294 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
296 * For mmapped tdb's that do not have a transaction open it points the parsing
297 * function directly at the mmap area, it avoids the malloc/memcpy in this
298 * case. If a transaction is open or no mmap is available, it has to do
299 * malloc/read/parse/free.
301 * This is interesting for all readers of potentially large data structures in
302 * the tdb records, ldb indexes being one example.
304 * Return -1 if the record was not found.
307 _PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
308 int (*parser)(TDB_DATA key, TDB_DATA data,
313 struct tdb_record rec;
317 /* find which hash bucket it is in */
318 hash = tdb->hash_fn(&key);
320 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
321 /* record not found */
322 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
323 tdb->ecode = TDB_ERR_NOEXIST;
326 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
328 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
329 rec.data_len, parser, private_data);
331 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
336 /* check if an entry in the database exists
338 note that 1 is returned if the key is found and 0 is returned if not found
339 this doesn't match the conventions in the rest of this module, but is
342 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
344 struct tdb_record rec;
346 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
348 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
352 _PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
354 uint32_t hash = tdb->hash_fn(&key);
357 ret = tdb_exists_hash(tdb, key, hash);
358 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
363 * Move a dead record to the freelist. The hash chain and freelist
366 static int tdb_del_dead(struct tdb_context *tdb,
369 struct tdb_record *rec,
374 ret = tdb_write_lock_record(tdb, rec_ptr);
376 /* Someone traversing here: Just leave it dead */
379 ret = tdb_write_unlock_record(tdb, rec_ptr);
383 ret = tdb_ofs_write(tdb, last_ptr, &rec->next);
390 ret = tdb_free(tdb, rec_ptr, rec);
395 * Walk the hash chain and leave tdb->max_dead_records around. Move
396 * the rest of dead records to the freelist.
398 int tdb_trim_dead(struct tdb_context *tdb, uint32_t hash)
400 struct tdb_chainwalk_ctx chainwalk;
401 struct tdb_record rec;
402 tdb_off_t last_ptr, rec_ptr;
403 bool locked_freelist = false;
407 last_ptr = TDB_HASH_TOP(hash);
410 * Init chainwalk with the pointer to the hash top. It might
411 * be that the very first record in the chain is a dead one
412 * that we have to delete.
414 tdb_chainwalk_init(&chainwalk, last_ptr);
416 ret = tdb_ofs_read(tdb, last_ptr, &rec_ptr);
421 while (rec_ptr != 0) {
422 bool deleted = false;
425 ret = tdb_rec_read(tdb, rec_ptr, &rec);
431 * Make a copy of rec.next: Further down we might
432 * delete and put the record on the freelist. Make
433 * sure that modifications in that code path can't
434 * break the chainwalk here.
438 if (rec.magic == TDB_DEAD_MAGIC) {
441 if (num_dead > tdb->max_dead_records) {
443 if (!locked_freelist) {
445 * Lock the freelist only if
446 * it's really required.
448 ret = tdb_lock(tdb, -1, F_WRLCK);
452 locked_freelist = true;
469 * Don't do the chainwalk check if "rec_ptr" was
470 * deleted. We reduced the chain, and the chainwalk
471 * check might catch up early. Imagine a valid chain
472 * with just dead records: We never can bump the
473 * "slow" pointer in chainwalk_check, as there isn't
474 * anything left to jump to and compare.
481 ok = tdb_chainwalk_check(tdb, &chainwalk, next);
491 if (locked_freelist) {
492 tdb_unlock(tdb, -1, F_WRLCK);
497 /* delete an entry in the database given a key */
498 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
501 struct tdb_record rec;
504 if (tdb->read_only || tdb->traverse_read) {
505 tdb->ecode = TDB_ERR_RDONLY;
509 rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec);
515 * Mark the record dead
517 rec.magic = TDB_DEAD_MAGIC;
518 ret = tdb_rec_write(tdb, rec_ptr, &rec);
523 tdb_increment_seqnum(tdb);
525 ret = tdb_trim_dead(tdb, hash);
527 if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0)
528 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
532 _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
534 uint32_t hash = tdb->hash_fn(&key);
537 ret = tdb_delete_hash(tdb, key, hash);
538 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
543 * See if we have a dead record around with enough space
545 tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
546 struct tdb_record *r, tdb_len_t length,
547 tdb_off_t *p_last_ptr)
549 tdb_off_t rec_ptr, last_ptr;
550 struct tdb_chainwalk_ctx chainwalk;
551 tdb_off_t best_rec_ptr = 0;
552 tdb_off_t best_last_ptr = 0;
553 struct tdb_record best = { .rec_len = UINT32_MAX };
555 length += sizeof(tdb_off_t); /* tailer */
557 last_ptr = TDB_HASH_TOP(hash);
559 /* read in the hash top */
560 if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1)
563 tdb_chainwalk_init(&chainwalk, rec_ptr);
565 /* keep looking until we find the right record */
569 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
572 if (TDB_DEAD(r) && (r->rec_len >= length) &&
573 (r->rec_len < best.rec_len)) {
574 best_rec_ptr = rec_ptr;
575 best_last_ptr = last_ptr;
581 ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
587 if (best.rec_len == UINT32_MAX) {
592 *p_last_ptr = best_last_ptr;
596 static int _tdb_storev(struct tdb_context *tdb, TDB_DATA key,
597 const TDB_DATA *dbufs, int num_dbufs,
598 int flag, uint32_t hash)
600 struct tdb_record rec;
601 tdb_off_t rec_ptr, ofs;
602 tdb_len_t rec_len, dbufs_len;
608 for (i=0; i<num_dbufs; i++) {
609 size_t dsize = dbufs[i].dsize;
611 if ((dsize != 0) && (dbufs[i].dptr == NULL)) {
612 tdb->ecode = TDB_ERR_EINVAL;
617 if (dbufs_len < dsize) {
618 tdb->ecode = TDB_ERR_OOM;
623 rec_len = key.dsize + dbufs_len;
624 if ((rec_len < key.dsize) || (rec_len < dbufs_len)) {
625 tdb->ecode = TDB_ERR_OOM;
629 /* check for it existing, on insert. */
630 if (flag == TDB_INSERT) {
631 if (tdb_exists_hash(tdb, key, hash)) {
632 tdb->ecode = TDB_ERR_EXISTS;
636 /* first try in-place update, on modify or replace. */
637 if (tdb_update_hash(tdb, key, hash, dbufs, num_dbufs,
641 if (tdb->ecode == TDB_ERR_NOEXIST &&
642 flag == TDB_MODIFY) {
643 /* if the record doesn't exist and we are in TDB_MODIFY mode then
644 we should fail the store */
648 /* reset the error code potentially set by the tdb_update_hash() */
649 tdb->ecode = TDB_SUCCESS;
651 /* delete any existing record - if it doesn't exist we don't
652 care. Doing this first reduces fragmentation, and avoids
653 coalescing with `allocated' block before it's updated. */
654 if (flag != TDB_INSERT)
655 tdb_delete_hash(tdb, key, hash);
657 /* we have to allocate some space */
658 rec_ptr = tdb_allocate(tdb, hash, rec_len, &rec);
664 /* Read hash top into next ptr */
665 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
668 rec.key_len = key.dsize;
669 rec.data_len = dbufs_len;
670 rec.full_hash = hash;
671 rec.magic = TDB_MAGIC;
675 /* write out and point the top of the hash chain at it */
676 ret = tdb_rec_write(tdb, ofs, &rec);
682 ret = tdb->methods->tdb_write(tdb, ofs, key.dptr, key.dsize);
688 for (i=0; i<num_dbufs; i++) {
689 if (dbufs[i].dsize == 0) {
693 ret = tdb->methods->tdb_write(tdb, ofs, dbufs[i].dptr,
698 ofs += dbufs[i].dsize;
701 ret = tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr);
703 /* Need to tdb_unallocate() here */
711 tdb_increment_seqnum(tdb);
716 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
717 TDB_DATA dbuf, int flag, uint32_t hash)
719 return _tdb_storev(tdb, key, &dbuf, 1, flag, hash);
722 /* store an element in the database, replacing any existing element
725 return 0 on success, -1 on failure
727 _PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
732 if (tdb->read_only || tdb->traverse_read) {
733 tdb->ecode = TDB_ERR_RDONLY;
734 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
738 /* find which hash bucket it is in */
739 hash = tdb->hash_fn(&key);
740 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
743 ret = _tdb_store(tdb, key, dbuf, flag, hash);
744 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
745 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
749 _PUBLIC_ int tdb_storev(struct tdb_context *tdb, TDB_DATA key,
750 const TDB_DATA *dbufs, int num_dbufs, int flag)
755 if (tdb->read_only || tdb->traverse_read) {
756 tdb->ecode = TDB_ERR_RDONLY;
757 tdb_trace_1plusn_rec_flag_ret(tdb, "tdb_storev", key,
758 dbufs, num_dbufs, flag, -1);
762 /* find which hash bucket it is in */
763 hash = tdb->hash_fn(&key);
764 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
767 ret = _tdb_storev(tdb, key, dbufs, num_dbufs, flag, hash);
768 tdb_trace_1plusn_rec_flag_ret(tdb, "tdb_storev", key,
769 dbufs, num_dbufs, flag, -1);
770 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
774 /* Append to an entry. Create if not exist. */
775 _PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
781 /* find which hash bucket it is in */
782 hash = tdb->hash_fn(&key);
783 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
786 dbufs[0] = _tdb_fetch(tdb, key);
789 ret = _tdb_storev(tdb, key, dbufs, 2, 0, hash);
790 tdb_trace_2rec_retrec(tdb, "tdb_append", key, dbufs[0], dbufs[1]);
792 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
793 SAFE_FREE(dbufs[0].dptr);
799 return the name of the current tdb file
800 useful for external logging functions
802 _PUBLIC_ const char *tdb_name(struct tdb_context *tdb)
808 return the underlying file descriptor being used by tdb, or -1
809 useful for external routines that want to check the device/inode
812 _PUBLIC_ int tdb_fd(struct tdb_context *tdb)
818 return the current logging function
819 useful for external tdb routines that wish to log tdb errors
821 _PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb)
823 return tdb->log.log_fn;
828 get the tdb sequence number. Only makes sense if the writers opened
829 with TDB_SEQNUM set. Note that this sequence number will wrap quite
830 quickly, so it should only be used for a 'has something changed'
831 test, not for code that relies on the count of the number of changes
832 made. If you want a counter then use a tdb record.
834 The aim of this sequence number is to allow for a very lightweight
835 test of a possible tdb change.
837 _PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb)
841 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
845 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb)
847 return tdb->hash_size;
850 _PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb)
852 return tdb->map_size;
855 _PUBLIC_ int tdb_get_flags(struct tdb_context *tdb)
860 _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
862 if ((flags & TDB_ALLOW_NESTING) &&
863 (flags & TDB_DISALLOW_NESTING)) {
864 tdb->ecode = TDB_ERR_NESTING;
865 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
866 "allow_nesting and disallow_nesting are not allowed together!"));
870 if (flags & TDB_ALLOW_NESTING) {
871 tdb->flags &= ~TDB_DISALLOW_NESTING;
873 if (flags & TDB_DISALLOW_NESTING) {
874 tdb->flags &= ~TDB_ALLOW_NESTING;
880 _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
882 if ((flags & TDB_ALLOW_NESTING) &&
883 (flags & TDB_DISALLOW_NESTING)) {
884 tdb->ecode = TDB_ERR_NESTING;
885 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
886 "allow_nesting and disallow_nesting are not allowed together!"));
890 if ((flags & TDB_NOLOCK) &&
891 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) &&
892 (tdb->mutexes == NULL)) {
893 tdb->ecode = TDB_ERR_LOCK;
894 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
895 "Can not remove NOLOCK flag on mutexed databases"));
899 if (flags & TDB_ALLOW_NESTING) {
900 tdb->flags |= TDB_DISALLOW_NESTING;
902 if (flags & TDB_DISALLOW_NESTING) {
903 tdb->flags |= TDB_ALLOW_NESTING;
906 tdb->flags &= ~flags;
911 enable sequence number handling on an open tdb
913 _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb)
915 tdb->flags |= TDB_SEQNUM;
920 add a region of the file to the freelist. Length is the size of the region in bytes,
921 which includes the free list header that needs to be added
923 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
925 struct tdb_record rec;
926 if (length <= sizeof(rec)) {
927 /* the region is not worth adding */
930 if (length + offset > tdb->map_size) {
931 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
934 memset(&rec,'\0',sizeof(rec));
935 rec.rec_len = length - sizeof(rec);
936 if (tdb_free(tdb, offset, &rec) == -1) {
937 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
944 wipe the entire database, deleting all records. This can be done
945 very fast by using a allrecord lock. The entire data portion of the
946 file becomes a single entry in the freelist.
948 This code carefully steps around the recovery area, leaving it alone
950 _PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb)
953 tdb_off_t offset = 0;
955 tdb_off_t recovery_head;
956 tdb_len_t recovery_size = 0;
958 if (tdb_lockall(tdb) != 0) {
962 tdb_trace(tdb, "tdb_wipe_all");
964 /* see if the tdb has a recovery area, and remember its size
965 if so. We don't want to lose this as otherwise each
966 tdb_wipe_all() in a transaction will increase the size of
967 the tdb by the size of the recovery area */
968 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
969 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
973 if (recovery_head != 0) {
974 struct tdb_record rec;
975 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
976 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
979 recovery_size = rec.rec_len + sizeof(rec);
982 /* wipe the hashes */
983 for (i=0;i<tdb->hash_size;i++) {
984 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
985 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
990 /* wipe the freelist */
991 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
992 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
996 /* add all the rest of the file to the freelist, possibly leaving a gap
997 for the recovery area */
998 if (recovery_size == 0) {
999 /* the simple case - the whole file can be used as a freelist */
1000 data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size));
1001 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
1005 /* we need to add two freelist entries - one on either
1006 side of the recovery area
1008 Note that we cannot shift the recovery area during
1009 this operation. Only the transaction.c code may
1010 move the recovery area or we risk subtle data
1013 data_len = (recovery_head - TDB_DATA_START(tdb->hash_size));
1014 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
1017 /* and the 2nd free list entry after the recovery area - if any */
1018 data_len = tdb->map_size - (recovery_head+recovery_size);
1019 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
1024 tdb_increment_seqnum_nonblock(tdb);
1026 if (tdb_unlockall(tdb) != 0) {
1027 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
1038 struct traverse_state {
1040 struct tdb_context *dest_db;
1044 traverse function for repacking
1046 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
1048 struct traverse_state *state = (struct traverse_state *)private_data;
1049 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
1050 state->error = true;
1059 _PUBLIC_ int tdb_repack(struct tdb_context *tdb)
1061 struct tdb_context *tmp_db;
1062 struct traverse_state state;
1064 tdb_trace(tdb, "tdb_repack");
1066 if (tdb_transaction_start(tdb) != 0) {
1067 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
1071 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
1072 if (tmp_db == NULL) {
1073 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
1074 tdb_transaction_cancel(tdb);
1078 state.error = false;
1079 state.dest_db = tmp_db;
1081 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
1082 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
1083 tdb_transaction_cancel(tdb);
1089 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
1090 tdb_transaction_cancel(tdb);
1095 if (tdb_wipe_all(tdb) != 0) {
1096 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
1097 tdb_transaction_cancel(tdb);
1102 state.error = false;
1103 state.dest_db = tdb;
1105 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
1106 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
1107 tdb_transaction_cancel(tdb);
1113 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
1114 tdb_transaction_cancel(tdb);
1121 if (tdb_transaction_commit(tdb) != 0) {
1122 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
1129 /* Even on files, we can get partial writes due to signals. */
1130 bool tdb_write_all(int fd, const void *buf, size_t count)
1134 ret = write(fd, buf, count);
1137 buf = (const char *)buf + ret;
1143 bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret)
1145 tdb_off_t ret = a + b;
1147 if ((ret < a) || (ret < b)) {
1155 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
1157 if (!tdb_write_all(tdb->tracefd, str, strlen(str))) {
1158 close(tdb->tracefd);
1163 static void tdb_trace_start(struct tdb_context *tdb)
1166 char msg[sizeof(tdb_off_t) * 4 + 1];
1168 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1169 snprintf(msg, sizeof(msg), "%u ", seqnum);
1170 tdb_trace_write(tdb, msg);
1173 static void tdb_trace_end(struct tdb_context *tdb)
1175 tdb_trace_write(tdb, "\n");
1178 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1180 char msg[sizeof(ret) * 4 + 4];
1181 snprintf(msg, sizeof(msg), " = %i\n", ret);
1182 tdb_trace_write(tdb, msg);
1185 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1187 char msg[20 + rec.dsize*2], *p;
1190 /* We differentiate zero-length records from non-existent ones. */
1191 if (rec.dptr == NULL) {
1192 tdb_trace_write(tdb, " NULL");
1196 /* snprintf here is purely cargo-cult programming. */
1198 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1199 for (i = 0; i < rec.dsize; i++)
1200 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1202 tdb_trace_write(tdb, msg);
1205 void tdb_trace(struct tdb_context *tdb, const char *op)
1207 tdb_trace_start(tdb);
1208 tdb_trace_write(tdb, op);
1212 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1214 char msg[sizeof(tdb_off_t) * 4 + 1];
1216 snprintf(msg, sizeof(msg), "%u ", seqnum);
1217 tdb_trace_write(tdb, msg);
1218 tdb_trace_write(tdb, op);
1222 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1223 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1227 snprintf(msg, sizeof(msg),
1228 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1229 tdb_trace_start(tdb);
1230 tdb_trace_write(tdb, msg);
1234 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1236 tdb_trace_start(tdb);
1237 tdb_trace_write(tdb, op);
1238 tdb_trace_end_ret(tdb, ret);
1241 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1243 tdb_trace_start(tdb);
1244 tdb_trace_write(tdb, op);
1245 tdb_trace_write(tdb, " =");
1246 tdb_trace_record(tdb, ret);
1250 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1253 tdb_trace_start(tdb);
1254 tdb_trace_write(tdb, op);
1255 tdb_trace_record(tdb, rec);
1259 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1260 TDB_DATA rec, int ret)
1262 tdb_trace_start(tdb);
1263 tdb_trace_write(tdb, op);
1264 tdb_trace_record(tdb, rec);
1265 tdb_trace_end_ret(tdb, ret);
1268 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1269 TDB_DATA rec, TDB_DATA ret)
1271 tdb_trace_start(tdb);
1272 tdb_trace_write(tdb, op);
1273 tdb_trace_record(tdb, rec);
1274 tdb_trace_write(tdb, " =");
1275 tdb_trace_record(tdb, ret);
1279 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1280 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1283 char msg[1 + sizeof(ret) * 4];
1285 snprintf(msg, sizeof(msg), " %#x", flag);
1286 tdb_trace_start(tdb);
1287 tdb_trace_write(tdb, op);
1288 tdb_trace_record(tdb, rec1);
1289 tdb_trace_record(tdb, rec2);
1290 tdb_trace_write(tdb, msg);
1291 tdb_trace_end_ret(tdb, ret);
1294 void tdb_trace_1plusn_rec_flag_ret(struct tdb_context *tdb, const char *op,
1296 const TDB_DATA *recs, int num_recs,
1297 unsigned flag, int ret)
1299 char msg[1 + sizeof(ret) * 4];
1302 snprintf(msg, sizeof(msg), " %#x", flag);
1303 tdb_trace_start(tdb);
1304 tdb_trace_write(tdb, op);
1305 tdb_trace_record(tdb, rec);
1306 for (i=0; i<num_recs; i++) {
1307 tdb_trace_record(tdb, recs[i]);
1309 tdb_trace_write(tdb, msg);
1310 tdb_trace_end_ret(tdb, ret);
1313 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1314 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1316 tdb_trace_start(tdb);
1317 tdb_trace_write(tdb, op);
1318 tdb_trace_record(tdb, rec1);
1319 tdb_trace_record(tdb, rec2);
1320 tdb_trace_write(tdb, " =");
1321 tdb_trace_record(tdb, ret);