2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
66 tdb_increment_seqnum_nonblock(tdb);
68 tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
71 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
73 return memcmp(data.dptr, key.dptr, data.dsize);
76 /* Returns 0 on fail. On success, return offset of record, and fills
78 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, u32 hash,
79 struct list_struct *r)
83 /* read in the hash top */
84 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
87 /* keep looking until we find the right record */
89 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
92 if (!TDB_DEAD(r) && hash==r->full_hash
93 && key.dsize==r->key_len
94 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
95 r->key_len, tdb_key_compare,
101 return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
104 /* As tdb_find, but if you succeed, keep the lock */
105 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, int locktype,
106 struct list_struct *rec)
110 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
112 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
113 tdb_unlock(tdb, BUCKET(hash), locktype);
118 /* update an entry in place - this only works if the new data size
119 is <= the old data size and the key exists.
120 on failure return -1.
122 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, TDB_DATA dbuf)
124 struct list_struct rec;
128 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
131 /* must be long enough key, data and tailer */
132 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
133 tdb->ecode = TDB_SUCCESS; /* Not really an error */
137 if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
138 dbuf.dptr, dbuf.dsize) == -1)
141 if (dbuf.dsize != rec.data_len) {
143 rec.data_len = dbuf.dsize;
144 return tdb_rec_write(tdb, rec_ptr, &rec);
150 /* find an entry in the database given a key */
151 /* If an entry doesn't exist tdb_err will be set to
152 * TDB_ERR_NOEXIST. If a key has no data attached
153 * then the TDB_DATA will have zero length but
156 TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
159 struct list_struct rec;
163 /* find which hash bucket it is in */
164 hash = tdb->hash_fn(&key);
165 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
168 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
170 ret.dsize = rec.data_len;
171 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
176 * Find an entry in the database and hand the record's data to a parsing
177 * function. The parsing function is executed under the chain read lock, so it
178 * should be fast and should not block on other syscalls.
180 * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
182 * For mmapped tdb's that do not have a transaction open it points the parsing
183 * function directly at the mmap area, it avoids the malloc/memcpy in this
184 * case. If a transaction is open or no mmap is available, it has to do
185 * malloc/read/parse/free.
187 * This is interesting for all readers of potentially large data structures in
188 * the tdb records, ldb indexes being one example.
191 int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
192 int (*parser)(TDB_DATA key, TDB_DATA data,
197 struct list_struct rec;
201 /* find which hash bucket it is in */
202 hash = tdb->hash_fn(&key);
204 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
205 return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
208 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
209 rec.data_len, parser, private_data);
211 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
216 /* check if an entry in the database exists
218 note that 1 is returned if the key is found and 0 is returned if not found
219 this doesn't match the conventions in the rest of this module, but is
222 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
224 struct list_struct rec;
226 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
228 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
232 int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
234 u32 hash = tdb->hash_fn(&key);
235 return tdb_exists_hash(tdb, key, hash);
238 /* actually delete an entry in the database given the offset */
239 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct*rec)
241 tdb_off_t last_ptr, i;
242 struct list_struct lastrec;
244 if (tdb->read_only || tdb->traverse_read) return -1;
246 if (tdb->traverse_write != 0 ||
247 tdb_write_lock_record(tdb, rec_ptr) == -1) {
248 /* Someone traversing here: mark it as dead */
249 rec->magic = TDB_DEAD_MAGIC;
250 return tdb_rec_write(tdb, rec_ptr, rec);
252 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
255 /* find previous record in hash chain */
256 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
258 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
259 if (tdb_rec_read(tdb, i, &lastrec) == -1)
262 /* unlink it: next ptr is at start of record. */
264 last_ptr = TDB_HASH_TOP(rec->full_hash);
265 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
268 /* recover the space */
269 if (tdb_free(tdb, rec_ptr, rec) == -1)
274 static int tdb_count_dead(struct tdb_context *tdb, u32 hash)
278 struct list_struct rec;
280 /* read in the hash top */
281 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
285 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
288 if (rec.magic == TDB_DEAD_MAGIC) {
297 * Purge all DEAD records from a hash chain
299 static int tdb_purge_dead(struct tdb_context *tdb, u32 hash)
302 struct list_struct rec;
305 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
309 /* read in the hash top */
310 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
316 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
322 if (rec.magic == TDB_DEAD_MAGIC
323 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
330 tdb_unlock(tdb, -1, F_WRLCK);
334 /* delete an entry in the database given a key */
335 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
338 struct list_struct rec;
341 if (tdb->max_dead_records != 0) {
344 * Allow for some dead records per hash chain, mainly for
345 * tdb's with a very high create/delete rate like locking.tdb.
348 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
351 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
353 * Don't let the per-chain freelist grow too large,
354 * delete all existing dead records
356 tdb_purge_dead(tdb, hash);
359 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
360 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
365 * Just mark the record as dead.
367 rec.magic = TDB_DEAD_MAGIC;
368 ret = tdb_rec_write(tdb, rec_ptr, &rec);
371 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
375 ret = tdb_do_delete(tdb, rec_ptr, &rec);
379 tdb_increment_seqnum(tdb);
382 if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
383 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
387 int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
389 u32 hash = tdb->hash_fn(&key);
390 return tdb_delete_hash(tdb, key, hash);
394 * See if we have a dead record around with enough space
396 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, u32 hash,
397 struct list_struct *r, tdb_len_t length)
401 /* read in the hash top */
402 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
405 /* keep looking until we find the right record */
407 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
410 if (TDB_DEAD(r) && r->rec_len >= length) {
412 * First fit for simple coding, TODO: change to best
422 /* store an element in the database, replacing any existing element
425 return 0 on success, -1 on failure
427 int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
429 struct list_struct rec;
435 if (tdb->read_only || tdb->traverse_read) {
436 tdb->ecode = TDB_ERR_RDONLY;
440 /* find which hash bucket it is in */
441 hash = tdb->hash_fn(&key);
442 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
445 /* check for it existing, on insert. */
446 if (flag == TDB_INSERT) {
447 if (tdb_exists_hash(tdb, key, hash)) {
448 tdb->ecode = TDB_ERR_EXISTS;
452 /* first try in-place update, on modify or replace. */
453 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
456 if (tdb->ecode == TDB_ERR_NOEXIST &&
457 flag == TDB_MODIFY) {
458 /* if the record doesn't exist and we are in TDB_MODIFY mode then
459 we should fail the store */
463 /* reset the error code potentially set by the tdb_update() */
464 tdb->ecode = TDB_SUCCESS;
466 /* delete any existing record - if it doesn't exist we don't
467 care. Doing this first reduces fragmentation, and avoids
468 coalescing with `allocated' block before it's updated. */
469 if (flag != TDB_INSERT)
470 tdb_delete_hash(tdb, key, hash);
472 /* Copy key+value *before* allocating free space in case malloc
473 fails and we are left with a dead spot in the tdb. */
475 if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
476 tdb->ecode = TDB_ERR_OOM;
480 memcpy(p, key.dptr, key.dsize);
482 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
484 if (tdb->max_dead_records != 0) {
486 * Allow for some dead records per hash chain, look if we can
487 * find one that can hold the new record. We need enough space
488 * for key, data and tailer. If we find one, we don't have to
489 * consult the central freelist.
491 rec_ptr = tdb_find_dead(
493 key.dsize + dbuf.dsize + sizeof(tdb_off_t));
496 rec.key_len = key.dsize;
497 rec.data_len = dbuf.dsize;
498 rec.full_hash = hash;
499 rec.magic = TDB_MAGIC;
500 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
501 || tdb->methods->tdb_write(
502 tdb, rec_ptr + sizeof(rec),
503 p, key.dsize + dbuf.dsize) == -1) {
511 * We have to allocate some space from the freelist, so this means we
512 * have to lock it. Use the chance to purge all the DEAD records from
513 * the hash chain under the freelist lock.
516 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
520 if ((tdb->max_dead_records != 0)
521 && (tdb_purge_dead(tdb, hash) == -1)) {
522 tdb_unlock(tdb, -1, F_WRLCK);
526 /* we have to allocate some space */
527 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
529 tdb_unlock(tdb, -1, F_WRLCK);
535 /* Read hash top into next ptr */
536 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
539 rec.key_len = key.dsize;
540 rec.data_len = dbuf.dsize;
541 rec.full_hash = hash;
542 rec.magic = TDB_MAGIC;
544 /* write out and point the top of the hash chain at it */
545 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
546 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
547 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
548 /* Need to tdb_unallocate() here */
556 tdb_increment_seqnum(tdb);
560 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
565 /* Append to an entry. Create if not exist. */
566 int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
572 /* find which hash bucket it is in */
573 hash = tdb->hash_fn(&key);
574 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
577 dbuf = tdb_fetch(tdb, key);
579 if (dbuf.dptr == NULL) {
580 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
582 dbuf.dptr = (unsigned char *)realloc(dbuf.dptr,
583 dbuf.dsize + new_dbuf.dsize);
586 if (dbuf.dptr == NULL) {
587 tdb->ecode = TDB_ERR_OOM;
591 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
592 dbuf.dsize += new_dbuf.dsize;
594 ret = tdb_store(tdb, key, dbuf, 0);
597 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
598 SAFE_FREE(dbuf.dptr);
604 return the name of the current tdb file
605 useful for external logging functions
607 const char *tdb_name(struct tdb_context *tdb)
613 return the underlying file descriptor being used by tdb, or -1
614 useful for external routines that want to check the device/inode
617 int tdb_fd(struct tdb_context *tdb)
623 return the current logging function
624 useful for external tdb routines that wish to log tdb errors
626 tdb_log_func tdb_log_fn(struct tdb_context *tdb)
628 return tdb->log.log_fn;
633 get the tdb sequence number. Only makes sense if the writers opened
634 with TDB_SEQNUM set. Note that this sequence number will wrap quite
635 quickly, so it should only be used for a 'has something changed'
636 test, not for code that relies on the count of the number of changes
637 made. If you want a counter then use a tdb record.
639 The aim of this sequence number is to allow for a very lightweight
640 test of a possible tdb change.
642 int tdb_get_seqnum(struct tdb_context *tdb)
646 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
650 int tdb_hash_size(struct tdb_context *tdb)
652 return tdb->header.hash_size;
655 size_t tdb_map_size(struct tdb_context *tdb)
657 return tdb->map_size;
660 int tdb_get_flags(struct tdb_context *tdb)
667 enable sequence number handling on an open tdb
669 void tdb_enable_seqnum(struct tdb_context *tdb)
671 tdb->flags |= TDB_SEQNUM;