source3/lib/tdb/common/tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, write to the Free Software
  26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  27 */
  28
  29 #include "tdb_private.h"
  30
  31 TDB_DATA tdb_null;
  32
  33 /*
  34   increment the tdb sequence number if the tdb has been opened using
  35   the TDB_SEQNUM flag
  36 */
  37 static void tdb_increment_seqnum(struct tdb_context *tdb)
  38 {
  39         tdb_off_t seqnum=0;
  40
  41         if (!(tdb->flags & TDB_SEQNUM)) {
  42                 return;
  43         }
  44
  45         if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
  46                 return;
  47         }
  48
  49         /* we ignore errors from this, as we have no sane way of
  50            dealing with them.
  51         */
  52         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
  53         seqnum++;
  54         tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
  55
  56         tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
  57 }
  58
  59 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
  60 {
  61         return memcmp(data.dptr, key.dptr, data.dsize);
  62 }
  63
  64 /* Returns 0 on fail.  On success, return offset of record, and fills
  65    in rec */
  66 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, u32 hash,
  67                         struct list_struct *r)
  68 {
  69         tdb_off_t rec_ptr;
  70
  71         /* read in the hash top */
  72         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
  73                 return 0;
  74
  75         /* keep looking until we find the right record */
  76         while (rec_ptr) {
  77                 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
  78                         return 0;
  79
  80                 if (!TDB_DEAD(r) && hash==r->full_hash
  81                     && key.dsize==r->key_len
  82                     && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
  83                                       r->key_len, tdb_key_compare,
  84                                       NULL) == 0) {
  85                         return rec_ptr;
  86                 }
  87                 rec_ptr = r->next;
  88         }
  89         return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
  90 }
  91
  92 /* As tdb_find, but if you succeed, keep the lock */
  93 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, int locktype,
  94                            struct list_struct *rec)
  95 {
  96         u32 rec_ptr;
  97
  98         if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
  99                 return 0;
 100         if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
 101                 tdb_unlock(tdb, BUCKET(hash), locktype);
 102         return rec_ptr;
 103 }
 104
 105
 106 /* update an entry in place - this only works if the new data size
 107    is <= the old data size and the key exists.
 108    on failure return -1.
 109 */
 110 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, TDB_DATA dbuf)
 111 {
 112         struct list_struct rec;
 113         tdb_off_t rec_ptr;
 114
 115         /* find entry */
 116         if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
 117                 return -1;
 118
 119         /* must be long enough key, data and tailer */
 120         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
 121                 tdb->ecode = TDB_SUCCESS; /* Not really an error */
 122                 return -1;
 123         }
 124
 125         if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 126                       dbuf.dptr, dbuf.dsize) == -1)
 127                 return -1;
 128
 129         if (dbuf.dsize != rec.data_len) {
 130                 /* update size */
 131                 rec.data_len = dbuf.dsize;
 132                 return tdb_rec_write(tdb, rec_ptr, &rec);
 133         }
 134
 135         return 0;
 136 }
 137
 138 /* find an entry in the database given a key */
 139 /* If an entry doesn't exist tdb_err will be set to
 140  * TDB_ERR_NOEXIST. If a key has no data attached
 141  * then the TDB_DATA will have zero length but
 142  * a non-zero pointer
 143  */
 144 TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
 145 {
 146         tdb_off_t rec_ptr;
 147         struct list_struct rec;
 148         TDB_DATA ret;
 149         u32 hash;
 150
 151         /* find which hash bucket it is in */
 152         hash = tdb->hash_fn(&key);
 153         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
 154                 return tdb_null;
 155
 156         ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 157                                   rec.data_len);
 158         ret.dsize = rec.data_len;
 159         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 160         return ret;
 161 }
 162
 163 /*
 164  * Find an entry in the database and hand the record's data to a parsing
 165  * function. The parsing function is executed under the chain read lock, so it
 166  * should be fast and should not block on other syscalls.
 167  *
 168  * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
 169  *
 170  * For mmapped tdb's that do not have a transaction open it points the parsing
 171  * function directly at the mmap area, it avoids the malloc/memcpy in this
 172  * case. If a transaction is open or no mmap is available, it has to do
 173  * malloc/read/parse/free.
 174  *
 175  * This is interesting for all readers of potentially large data structures in
 176  * the tdb records, ldb indexes being one example.
 177  */
 178
 179 int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
 180                      int (*parser)(TDB_DATA key, TDB_DATA data,
 181                                    void *private_data),
 182                      void *private_data)
 183 {
 184         tdb_off_t rec_ptr;
 185         struct list_struct rec;
 186         int ret;
 187         u32 hash;
 188
 189         /* find which hash bucket it is in */
 190         hash = tdb->hash_fn(&key);
 191
 192         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 193                 return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
 194         }
 195
 196         ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 197                              rec.data_len, parser, private_data);
 198
 199         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 200
 201         return ret;
 202 }
 203
 204 /* check if an entry in the database exists
 205
 206    note that 1 is returned if the key is found and 0 is returned if not found
 207    this doesn't match the conventions in the rest of this module, but is
 208    compatible with gdbm
 209 */
 210 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
 211 {
 212         struct list_struct rec;
 213
 214         if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 215                 return 0;
 216         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 217         return 1;
 218 }
 219
 220 int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
 221 {
 222         u32 hash = tdb->hash_fn(&key);
 223         return tdb_exists_hash(tdb, key, hash);
 224 }
 225
 226 /* actually delete an entry in the database given the offset */
 227 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct*rec)
 228 {
 229         tdb_off_t last_ptr, i;
 230         struct list_struct lastrec;
 231
 232         if (tdb->read_only || tdb->traverse_read) return -1;
 233
 234         if (tdb_write_lock_record(tdb, rec_ptr) == -1) {
 235                 /* Someone traversing here: mark it as dead */
 236                 rec->magic = TDB_DEAD_MAGIC;
 237                 return tdb_rec_write(tdb, rec_ptr, rec);
 238         }
 239         if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
 240                 return -1;
 241
 242         /* find previous record in hash chain */
 243         if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
 244                 return -1;
 245         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 246                 if (tdb_rec_read(tdb, i, &lastrec) == -1)
 247                         return -1;
 248
 249         /* unlink it: next ptr is at start of record. */
 250         if (last_ptr == 0)
 251                 last_ptr = TDB_HASH_TOP(rec->full_hash);
 252         if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
 253                 return -1;
 254
 255         /* recover the space */
 256         if (tdb_free(tdb, rec_ptr, rec) == -1)
 257                 return -1;
 258         return 0;
 259 }
 260
 261 static int tdb_count_dead(struct tdb_context *tdb, u32 hash)
 262 {
 263         int res = 0;
 264         tdb_off_t rec_ptr;
 265         struct list_struct rec;
 266
 267         /* read in the hash top */
 268         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 269                 return 0;
 270
 271         while (rec_ptr) {
 272                 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
 273                         return 0;
 274
 275                 if (rec.magic == TDB_DEAD_MAGIC) {
 276                         res += 1;
 277                 }
 278                 rec_ptr = rec.next;
 279         }
 280         return res;
 281 }
 282
 283 /*
 284  * Purge all DEAD records from a hash chain
 285  */
 286 static int tdb_purge_dead(struct tdb_context *tdb, u32 hash)
 287 {
 288         int res = -1;
 289         struct list_struct rec;
 290         tdb_off_t rec_ptr;
 291
 292         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 293                 return -1;
 294         }
 295
 296         /* read in the hash top */
 297         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 298                 goto fail;
 299
 300         while (rec_ptr) {
 301                 tdb_off_t next;
 302
 303                 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
 304                         goto fail;
 305                 }
 306
 307                 next = rec.next;
 308
 309                 if (rec.magic == TDB_DEAD_MAGIC
 310                     && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
 311                         goto fail;
 312                 }
 313                 rec_ptr = next;
 314         }
 315         res = 0;
 316  fail:
 317         tdb_unlock(tdb, -1, F_WRLCK);
 318         return res;
 319 }
 320
 321 /* delete an entry in the database given a key */
 322 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
 323 {
 324         tdb_off_t rec_ptr;
 325         struct list_struct rec;
 326         int ret;
 327
 328         if (tdb->max_dead_records != 0) {
 329
 330                 /*
 331                  * Allow for some dead records per hash chain, mainly for
 332                  * tdb's with a very high create/delete rate like locking.tdb.
 333                  */
 334
 335                 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 336                         return -1;
 337
 338                 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
 339                         /*
 340                          * Don't let the per-chain freelist grow too large,
 341                          * delete all existing dead records
 342                          */
 343                         tdb_purge_dead(tdb, hash);
 344                 }
 345
 346                 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
 347                         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 348                         return -1;
 349                 }
 350
 351                 /*
 352                  * Just mark the record as dead.
 353                  */
 354                 rec.magic = TDB_DEAD_MAGIC;
 355                 ret = tdb_rec_write(tdb, rec_ptr, &rec);
 356         }
 357         else {
 358                 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
 359                                                    &rec)))
 360                         return -1;
 361
 362                 ret = tdb_do_delete(tdb, rec_ptr, &rec);
 363         }
 364
 365         if (ret == 0) {
 366                 tdb_increment_seqnum(tdb);
 367         }
 368
 369         if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
 370                 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
 371         return ret;
 372 }
 373
 374 int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
 375 {
 376         u32 hash = tdb->hash_fn(&key);
 377         return tdb_delete_hash(tdb, key, hash);
 378 }
 379
 380 /*
 381  * See if we have a dead record around with enough space
 382  */
 383 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, u32 hash,
 384                                struct list_struct *r, tdb_len_t length)
 385 {
 386         tdb_off_t rec_ptr;
 387
 388         /* read in the hash top */
 389         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 390                 return 0;
 391
 392         /* keep looking until we find the right record */
 393         while (rec_ptr) {
 394                 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
 395                         return 0;
 396
 397                 if (TDB_DEAD(r) && r->rec_len >= length) {
 398                         /*
 399                          * First fit for simple coding, TODO: change to best
 400                          * fit
 401                          */
 402                         return rec_ptr;
 403                 }
 404                 rec_ptr = r->next;
 405         }
 406         return 0;
 407 }
 408
 409 /* store an element in the database, replacing any existing element
 410    with the same key
 411
 412    return 0 on success, -1 on failure
 413 */
 414 int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 415 {
 416         struct list_struct rec;
 417         u32 hash;
 418         tdb_off_t rec_ptr;
 419         char *p = NULL;
 420         int ret = -1;
 421
 422         if (tdb->read_only || tdb->traverse_read) {
 423                 tdb->ecode = TDB_ERR_RDONLY;
 424                 return -1;
 425         }
 426
 427         /* find which hash bucket it is in */
 428         hash = tdb->hash_fn(&key);
 429         if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 430                 return -1;
 431
 432         /* check for it existing, on insert. */
 433         if (flag == TDB_INSERT) {
 434                 if (tdb_exists_hash(tdb, key, hash)) {
 435                         tdb->ecode = TDB_ERR_EXISTS;
 436                         goto fail;
 437                 }
 438         } else {
 439                 /* first try in-place update, on modify or replace. */
 440                 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
 441                         goto done;
 442                 }
 443                 if (tdb->ecode == TDB_ERR_NOEXIST &&
 444                     flag == TDB_MODIFY) {
 445                         /* if the record doesn't exist and we are in TDB_MODIFY mode then
 446                          we should fail the store */
 447                         goto fail;
 448                 }
 449         }
 450         /* reset the error code potentially set by the tdb_update() */
 451         tdb->ecode = TDB_SUCCESS;
 452
 453         /* delete any existing record - if it doesn't exist we don't
 454            care.  Doing this first reduces fragmentation, and avoids
 455            coalescing with `allocated' block before it's updated. */
 456         if (flag != TDB_INSERT)
 457                 tdb_delete_hash(tdb, key, hash);
 458
 459         /* Copy key+value *before* allocating free space in case malloc
 460            fails and we are left with a dead spot in the tdb. */
 461
 462         if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
 463                 tdb->ecode = TDB_ERR_OOM;
 464                 goto fail;
 465         }
 466
 467         memcpy(p, key.dptr, key.dsize);
 468         if (dbuf.dsize)
 469                 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 470
 471         if (tdb->max_dead_records != 0) {
 472                 /*
 473                  * Allow for some dead records per hash chain, look if we can
 474                  * find one that can hold the new record. We need enough space
 475                  * for key, data and tailer. If we find one, we don't have to
 476                  * consult the central freelist.
 477                  */
 478                 rec_ptr = tdb_find_dead(
 479                         tdb, hash, &rec,
 480                         key.dsize + dbuf.dsize + sizeof(tdb_off_t));
 481
 482                 if (rec_ptr != 0) {
 483                         rec.key_len = key.dsize;
 484                         rec.data_len = dbuf.dsize;
 485                         rec.full_hash = hash;
 486                         rec.magic = TDB_MAGIC;
 487                         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 488                             || tdb->methods->tdb_write(
 489                                     tdb, rec_ptr + sizeof(rec),
 490                                     p, key.dsize + dbuf.dsize) == -1) {
 491                                 goto fail;
 492                         }
 493                         goto done;
 494                 }
 495         }
 496
 497         /*
 498          * We have to allocate some space from the freelist, so this means we
 499          * have to lock it. Use the chance to purge all the DEAD records from
 500          * the hash chain under the freelist lock.
 501          */
 502
 503         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 504                 goto fail;
 505         }
 506
 507         if ((tdb->max_dead_records != 0)
 508             && (tdb_purge_dead(tdb, hash) == -1)) {
 509                 tdb_unlock(tdb, -1, F_WRLCK);
 510                 goto fail;
 511         }
 512
 513         /* we have to allocate some space */
 514         rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 515
 516         tdb_unlock(tdb, -1, F_WRLCK);
 517
 518         if (rec_ptr == 0) {
 519                 goto fail;
 520         }
 521
 522         /* Read hash top into next ptr */
 523         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
 524                 goto fail;
 525
 526         rec.key_len = key.dsize;
 527         rec.data_len = dbuf.dsize;
 528         rec.full_hash = hash;
 529         rec.magic = TDB_MAGIC;
 530
 531         /* write out and point the top of the hash chain at it */
 532         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 533             || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 534             || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
 535                 /* Need to tdb_unallocate() here */
 536                 goto fail;
 537         }
 538
 539  done:
 540         ret = 0;
 541  fail:
 542         if (ret == 0) {
 543                 tdb_increment_seqnum(tdb);
 544         }
 545
 546         SAFE_FREE(p);
 547         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 548         return ret;
 549 }
 550
 551
 552 /* Append to an entry. Create if not exist. */
 553 int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 554 {
 555         u32 hash;
 556         TDB_DATA dbuf;
 557         int ret = -1;
 558
 559         /* find which hash bucket it is in */
 560         hash = tdb->hash_fn(&key);
 561         if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 562                 return -1;
 563
 564         dbuf = tdb_fetch(tdb, key);
 565
 566         if (dbuf.dptr == NULL) {
 567                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 568         } else {
 569                 unsigned char *new_dptr = (unsigned char *)realloc(dbuf.dptr,
 570                                                      dbuf.dsize + new_dbuf.dsize);
 571                 if (new_dptr == NULL) {
 572                         free(dbuf.dptr);
 573                 }
 574                 dbuf.dptr = new_dptr;
 575         }
 576
 577         if (dbuf.dptr == NULL) {
 578                 tdb->ecode = TDB_ERR_OOM;
 579                 goto failed;
 580         }
 581
 582         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 583         dbuf.dsize += new_dbuf.dsize;
 584
 585         ret = tdb_store(tdb, key, dbuf, 0);
 586
 587 failed:
 588         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 589         SAFE_FREE(dbuf.dptr);
 590         return ret;
 591 }
 592
 593
 594 /*
 595   return the name of the current tdb file
 596   useful for external logging functions
 597 */
 598 const char *tdb_name(struct tdb_context *tdb)
 599 {
 600         return tdb->name;
 601 }
 602
 603 /*
 604   return the underlying file descriptor being used by tdb, or -1
 605   useful for external routines that want to check the device/inode
 606   of the fd
 607 */
 608 int tdb_fd(struct tdb_context *tdb)
 609 {
 610         return tdb->fd;
 611 }
 612
 613 /*
 614   return the current logging function
 615   useful for external tdb routines that wish to log tdb errors
 616 */
 617 tdb_log_func tdb_log_fn(struct tdb_context *tdb)
 618 {
 619         return tdb->log.log_fn;
 620 }
 621
 622
 623 /*
 624   get the tdb sequence number. Only makes sense if the writers opened
 625   with TDB_SEQNUM set. Note that this sequence number will wrap quite
 626   quickly, so it should only be used for a 'has something changed'
 627   test, not for code that relies on the count of the number of changes
 628   made. If you want a counter then use a tdb record.
 629
 630   The aim of this sequence number is to allow for a very lightweight
 631   test of a possible tdb change.
 632 */
 633 int tdb_get_seqnum(struct tdb_context *tdb)
 634 {
 635         tdb_off_t seqnum=0;
 636
 637         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
 638         return seqnum;
 639 }
 640
 641 int tdb_hash_size(struct tdb_context *tdb)
 642 {
 643         return tdb->header.hash_size;
 644 }
 645
 646 size_t tdb_map_size(struct tdb_context *tdb)
 647 {
 648         return tdb->map_size;
 649 }
 650
 651 int tdb_get_flags(struct tdb_context *tdb)
 652 {
 653         return tdb->flags;
 654 }
 655