source/lib/tdb/common/tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 2 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, write to the Free Software
  26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  27 */
  28
  29 #include "tdb_private.h"
  30
  31 TDB_DATA tdb_null;
  32
  33 /*
  34   increment the tdb sequence number if the tdb has been opened using
  35   the TDB_SEQNUM flag
  36 */
  37 static void tdb_increment_seqnum(struct tdb_context *tdb)
  38 {
  39         tdb_off_t seqnum=0;
  40
  41         if (!(tdb->flags & TDB_SEQNUM)) {
  42                 return;
  43         }
  44
  45         if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
  46                 return;
  47         }
  48
  49         /* we ignore errors from this, as we have no sane way of
  50            dealing with them.
  51         */
  52         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
  53         seqnum++;
  54         tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
  55
  56         tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
  57 }
  58
  59 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
  60 {
  61         return memcmp(data.dptr, key.dptr, data.dsize);
  62 }
  63
  64 /* Returns 0 on fail.  On success, return offset of record, and fills
  65    in rec */
  66 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
  67                         struct list_struct *r)
  68 {
  69         tdb_off_t rec_ptr;
  70
  71         /* read in the hash top */
  72         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
  73                 return 0;
  74
  75         /* keep looking until we find the right record */
  76         while (rec_ptr) {
  77                 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
  78                         return 0;
  79
  80                 if (!TDB_DEAD(r) && hash==r->full_hash
  81                     && key.dsize==r->key_len
  82                     && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
  83                                       r->key_len, tdb_key_compare,
  84                                       NULL) == 0) {
  85                         return rec_ptr;
  86                 }
  87                 rec_ptr = r->next;
  88         }
  89         return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
  90 }
  91
  92 /* As tdb_find, but if you succeed, keep the lock */
  93 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key,
  94                                                          uint32_t hash, int locktype,
  95                            struct list_struct *rec)
  96 {
  97         uint32_t rec_ptr;
  98
  99         if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
 100                 return 0;
 101         if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
 102                 tdb_unlock(tdb, BUCKET(hash), locktype);
 103         return rec_ptr;
 104 }
 105
 106
 107 /* update an entry in place - this only works if the new data size
 108    is <= the old data size and the key exists.
 109    on failure return -1.
 110 */
 111 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 112 {
 113         struct list_struct rec;
 114         tdb_off_t rec_ptr;
 115
 116         /* find entry */
 117         if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
 118                 return -1;
 119
 120         /* must be long enough key, data and tailer */
 121         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
 122                 tdb->ecode = TDB_SUCCESS; /* Not really an error */
 123                 return -1;
 124         }
 125
 126         if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 127                       dbuf.dptr, dbuf.dsize) == -1)
 128                 return -1;
 129
 130         if (dbuf.dsize != rec.data_len) {
 131                 /* update size */
 132                 rec.data_len = dbuf.dsize;
 133                 return tdb_rec_write(tdb, rec_ptr, &rec);
 134         }
 135
 136         return 0;
 137 }
 138
 139 /* find an entry in the database given a key */
 140 /* If an entry doesn't exist tdb_err will be set to
 141  * TDB_ERR_NOEXIST. If a key has no data attached
 142  * then the TDB_DATA will have zero length but
 143  * a non-zero pointer
 144  */
 145 TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
 146 {
 147         tdb_off_t rec_ptr;
 148         struct list_struct rec;
 149         TDB_DATA ret;
 150         uint32_t hash;
 151
 152         /* find which hash bucket it is in */
 153         hash = tdb->hash_fn(&key);
 154         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
 155                 return tdb_null;
 156
 157         ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 158                                   rec.data_len);
 159         ret.dsize = rec.data_len;
 160         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 161         return ret;
 162 }
 163
 164 /*
 165  * Find an entry in the database and hand the record's data to a parsing
 166  * function. The parsing function is executed under the chain read lock, so it
 167  * should be fast and should not block on other syscalls.
 168  *
 169  * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
 170  *
 171  * For mmapped tdb's that do not have a transaction open it points the parsing
 172  * function directly at the mmap area, it avoids the malloc/memcpy in this
 173  * case. If a transaction is open or no mmap is available, it has to do
 174  * malloc/read/parse/free.
 175  *
 176  * This is interesting for all readers of potentially large data structures in
 177  * the tdb records, ldb indexes being one example.
 178  */
 179
 180 int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
 181                      int (*parser)(TDB_DATA key, TDB_DATA data,
 182                                    void *private_data),
 183                      void *private_data)
 184 {
 185         tdb_off_t rec_ptr;
 186         struct list_struct rec;
 187         int ret;
 188         uint32_t hash;
 189
 190         /* find which hash bucket it is in */
 191         hash = tdb->hash_fn(&key);
 192
 193         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 194                 return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
 195         }
 196
 197         ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 198                              rec.data_len, parser, private_data);
 199
 200         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 201
 202         return ret;
 203 }
 204
 205 /* check if an entry in the database exists
 206
 207    note that 1 is returned if the key is found and 0 is returned if not found
 208    this doesn't match the conventions in the rest of this module, but is
 209    compatible with gdbm
 210 */
 211 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 212 {
 213         struct list_struct rec;
 214
 215         if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 216                 return 0;
 217         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 218         return 1;
 219 }
 220
 221 int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
 222 {
 223         uint32_t hash = tdb->hash_fn(&key);
 224         return tdb_exists_hash(tdb, key, hash);
 225 }
 226
 227 /* actually delete an entry in the database given the offset */
 228 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct*rec)
 229 {
 230         tdb_off_t last_ptr, i;
 231         struct list_struct lastrec;
 232
 233         if (tdb->read_only || tdb->traverse_read) return -1;
 234
 235         if (tdb_write_lock_record(tdb, rec_ptr) == -1) {
 236                 /* Someone traversing here: mark it as dead */
 237                 rec->magic = TDB_DEAD_MAGIC;
 238                 return tdb_rec_write(tdb, rec_ptr, rec);
 239         }
 240         if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
 241                 return -1;
 242
 243         /* find previous record in hash chain */
 244         if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
 245                 return -1;
 246         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 247                 if (tdb_rec_read(tdb, i, &lastrec) == -1)
 248                         return -1;
 249
 250         /* unlink it: next ptr is at start of record. */
 251         if (last_ptr == 0)
 252                 last_ptr = TDB_HASH_TOP(rec->full_hash);
 253         if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
 254                 return -1;
 255
 256         /* recover the space */
 257         if (tdb_free(tdb, rec_ptr, rec) == -1)
 258                 return -1;
 259         return 0;
 260 }
 261
 262 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
 263 {
 264         int res = 0;
 265         tdb_off_t rec_ptr;
 266         struct list_struct rec;
 267
 268         /* read in the hash top */
 269         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 270                 return 0;
 271
 272         while (rec_ptr) {
 273                 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
 274                         return 0;
 275
 276                 if (rec.magic == TDB_DEAD_MAGIC) {
 277                         res += 1;
 278                 }
 279                 rec_ptr = rec.next;
 280         }
 281         return res;
 282 }
 283
 284 /*
 285  * Purge all DEAD records from a hash chain
 286  */
 287 static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
 288 {
 289         int res = -1;
 290         struct list_struct rec;
 291         tdb_off_t rec_ptr;
 292
 293         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 294                 return -1;
 295         }
 296
 297         /* read in the hash top */
 298         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 299                 goto fail;
 300
 301         while (rec_ptr) {
 302                 tdb_off_t next;
 303
 304                 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
 305                         goto fail;
 306                 }
 307
 308                 next = rec.next;
 309
 310                 if (rec.magic == TDB_DEAD_MAGIC
 311                     && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
 312                         goto fail;
 313                 }
 314                 rec_ptr = next;
 315         }
 316         res = 0;
 317  fail:
 318         tdb_unlock(tdb, -1, F_WRLCK);
 319         return res;
 320 }
 321
 322 /* delete an entry in the database given a key */
 323 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 324 {
 325         tdb_off_t rec_ptr;
 326         struct list_struct rec;
 327         int ret;
 328
 329         if (tdb->max_dead_records != 0) {
 330
 331                 /*
 332                  * Allow for some dead records per hash chain, mainly for
 333                  * tdb's with a very high create/delete rate like locking.tdb.
 334                  */
 335
 336                 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 337                         return -1;
 338
 339                 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
 340                         /*
 341                          * Don't let the per-chain freelist grow too large,
 342                          * delete all existing dead records
 343                          */
 344                         tdb_purge_dead(tdb, hash);
 345                 }
 346
 347                 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
 348                         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 349                         return -1;
 350                 }
 351
 352                 /*
 353                  * Just mark the record as dead.
 354                  */
 355                 rec.magic = TDB_DEAD_MAGIC;
 356                 ret = tdb_rec_write(tdb, rec_ptr, &rec);
 357         }
 358         else {
 359                 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
 360                                                    &rec)))
 361                         return -1;
 362
 363                 ret = tdb_do_delete(tdb, rec_ptr, &rec);
 364         }
 365
 366         if (ret == 0) {
 367                 tdb_increment_seqnum(tdb);
 368         }
 369
 370         if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
 371                 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
 372         return ret;
 373 }
 374
 375 int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
 376 {
 377         uint32_t hash = tdb->hash_fn(&key);
 378         return tdb_delete_hash(tdb, key, hash);
 379 }
 380
 381 /*
 382  * See if we have a dead record around with enough space
 383  */
 384 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
 385                                struct list_struct *r, tdb_len_t length)
 386 {
 387         tdb_off_t rec_ptr;
 388
 389         /* read in the hash top */
 390         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 391                 return 0;
 392
 393         /* keep looking until we find the right record */
 394         while (rec_ptr) {
 395                 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
 396                         return 0;
 397
 398                 if (TDB_DEAD(r) && r->rec_len >= length) {
 399                         /*
 400                          * First fit for simple coding, TODO: change to best
 401                          * fit
 402                          */
 403                         return rec_ptr;
 404                 }
 405                 rec_ptr = r->next;
 406         }
 407         return 0;
 408 }
 409
 410 /* store an element in the database, replacing any existing element
 411    with the same key
 412
 413    return 0 on success, -1 on failure
 414 */
 415 int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 416 {
 417         struct list_struct rec;
 418         uint32_t hash;
 419         tdb_off_t rec_ptr;
 420         char *p = NULL;
 421         int ret = -1;
 422
 423         if (tdb->read_only || tdb->traverse_read) {
 424                 tdb->ecode = TDB_ERR_RDONLY;
 425                 return -1;
 426         }
 427
 428         /* find which hash bucket it is in */
 429         hash = tdb->hash_fn(&key);
 430         if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 431                 return -1;
 432
 433         /* check for it existing, on insert. */
 434         if (flag == TDB_INSERT) {
 435                 if (tdb_exists_hash(tdb, key, hash)) {
 436                         tdb->ecode = TDB_ERR_EXISTS;
 437                         goto fail;
 438                 }
 439         } else {
 440                 /* first try in-place update, on modify or replace. */
 441                 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
 442                         goto done;
 443                 }
 444                 if (tdb->ecode == TDB_ERR_NOEXIST &&
 445                     flag == TDB_MODIFY) {
 446                         /* if the record doesn't exist and we are in TDB_MODIFY mode then
 447                          we should fail the store */
 448                         goto fail;
 449                 }
 450         }
 451         /* reset the error code potentially set by the tdb_update() */
 452         tdb->ecode = TDB_SUCCESS;
 453
 454         /* delete any existing record - if it doesn't exist we don't
 455            care.  Doing this first reduces fragmentation, and avoids
 456            coalescing with `allocated' block before it's updated. */
 457         if (flag != TDB_INSERT)
 458                 tdb_delete_hash(tdb, key, hash);
 459
 460         /* Copy key+value *before* allocating free space in case malloc
 461            fails and we are left with a dead spot in the tdb. */
 462
 463         if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
 464                 tdb->ecode = TDB_ERR_OOM;
 465                 goto fail;
 466         }
 467
 468         memcpy(p, key.dptr, key.dsize);
 469         if (dbuf.dsize)
 470                 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 471
 472         if (tdb->max_dead_records != 0) {
 473                 /*
 474                  * Allow for some dead records per hash chain, look if we can
 475                  * find one that can hold the new record. We need enough space
 476                  * for key, data and tailer. If we find one, we don't have to
 477                  * consult the central freelist.
 478                  */
 479                 rec_ptr = tdb_find_dead(
 480                         tdb, hash, &rec,
 481                         key.dsize + dbuf.dsize + sizeof(tdb_off_t));
 482
 483                 if (rec_ptr != 0) {
 484                         rec.key_len = key.dsize;
 485                         rec.data_len = dbuf.dsize;
 486                         rec.full_hash = hash;
 487                         rec.magic = TDB_MAGIC;
 488                         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 489                             || tdb->methods->tdb_write(
 490                                     tdb, rec_ptr + sizeof(rec),
 491                                     p, key.dsize + dbuf.dsize) == -1) {
 492                                 goto fail;
 493                         }
 494                         goto done;
 495                 }
 496         }
 497
 498         /*
 499          * We have to allocate some space from the freelist, so this means we
 500          * have to lock it. Use the chance to purge all the DEAD records from
 501          * the hash chain under the freelist lock.
 502          */
 503
 504         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 505                 goto fail;
 506         }
 507
 508         if ((tdb->max_dead_records != 0)
 509             && (tdb_purge_dead(tdb, hash) == -1)) {
 510                 tdb_unlock(tdb, -1, F_WRLCK);
 511                 goto fail;
 512         }
 513
 514         /* we have to allocate some space */
 515         rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 516
 517         tdb_unlock(tdb, -1, F_WRLCK);
 518
 519         if (rec_ptr == 0) {
 520                 goto fail;
 521         }
 522
 523         /* Read hash top into next ptr */
 524         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
 525                 goto fail;
 526
 527         rec.key_len = key.dsize;
 528         rec.data_len = dbuf.dsize;
 529         rec.full_hash = hash;
 530         rec.magic = TDB_MAGIC;
 531
 532         /* write out and point the top of the hash chain at it */
 533         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 534             || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 535             || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
 536                 /* Need to tdb_unallocate() here */
 537                 goto fail;
 538         }
 539
 540  done:
 541         ret = 0;
 542  fail:
 543         if (ret == 0) {
 544                 tdb_increment_seqnum(tdb);
 545         }
 546
 547         SAFE_FREE(p);
 548         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 549         return ret;
 550 }
 551
 552
 553 /* Append to an entry. Create if not exist. */
 554 int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 555 {
 556         uint32_t hash;
 557         TDB_DATA dbuf;
 558         int ret = -1;
 559
 560         /* find which hash bucket it is in */
 561         hash = tdb->hash_fn(&key);
 562         if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 563                 return -1;
 564
 565         dbuf = tdb_fetch(tdb, key);
 566
 567         if (dbuf.dptr == NULL) {
 568                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 569         } else {
 570                 unsigned char *new_dptr = (unsigned char *)realloc(dbuf.dptr,
 571                                                      dbuf.dsize + new_dbuf.dsize);
 572                 if (new_dptr == NULL) {
 573                         free(dbuf.dptr);
 574                 }
 575                 dbuf.dptr = new_dptr;
 576         }
 577
 578         if (dbuf.dptr == NULL) {
 579                 tdb->ecode = TDB_ERR_OOM;
 580                 goto failed;
 581         }
 582
 583         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 584         dbuf.dsize += new_dbuf.dsize;
 585
 586         ret = tdb_store(tdb, key, dbuf, 0);
 587
 588 failed:
 589         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 590         SAFE_FREE(dbuf.dptr);
 591         return ret;
 592 }
 593
 594
 595 /*
 596   return the name of the current tdb file
 597   useful for external logging functions
 598 */
 599 const char *tdb_name(struct tdb_context *tdb)
 600 {
 601         return tdb->name;
 602 }
 603
 604 /*
 605   return the underlying file descriptor being used by tdb, or -1
 606   useful for external routines that want to check the device/inode
 607   of the fd
 608 */
 609 int tdb_fd(struct tdb_context *tdb)
 610 {
 611         return tdb->fd;
 612 }
 613
 614 /*
 615   return the current logging function
 616   useful for external tdb routines that wish to log tdb errors
 617 */
 618 tdb_log_func tdb_log_fn(struct tdb_context *tdb)
 619 {
 620         return tdb->log.log_fn;
 621 }
 622
 623
 624 /*
 625   get the tdb sequence number. Only makes sense if the writers opened
 626   with TDB_SEQNUM set. Note that this sequence number will wrap quite
 627   quickly, so it should only be used for a 'has something changed'
 628   test, not for code that relies on the count of the number of changes
 629   made. If you want a counter then use a tdb record.
 630
 631   The aim of this sequence number is to allow for a very lightweight
 632   test of a possible tdb change.
 633 */
 634 int tdb_get_seqnum(struct tdb_context *tdb)
 635 {
 636         tdb_off_t seqnum=0;
 637
 638         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
 639         return seqnum;
 640 }
 641
 642 int tdb_hash_size(struct tdb_context *tdb)
 643 {
 644         return tdb->header.hash_size;
 645 }
 646
 647 size_t tdb_map_size(struct tdb_context *tdb)
 648 {
 649         return tdb->map_size;
 650 }
 651
 652 int tdb_get_flags(struct tdb_context *tdb)
 653 {
 654         return tdb->flags;
 655 }
 656