lib/tdb/common/tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb_private.h"
  29
  30 TDB_DATA tdb_null;
  31
  32 /*
  33   non-blocking increment of the tdb sequence number if the tdb has been opened using
  34   the TDB_SEQNUM flag
  35 */
  36 void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
  37 {
  38         tdb_off_t seqnum=0;
  39
  40         if (!(tdb->flags & TDB_SEQNUM)) {
  41                 return;
  42         }
  43
  44         /* we ignore errors from this, as we have no sane way of
  45            dealing with them.
  46         */
  47         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
  48         seqnum++;
  49         tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
  50 }
  51
  52 /*
  53   increment the tdb sequence number if the tdb has been opened using
  54   the TDB_SEQNUM flag
  55 */
  56 static void tdb_increment_seqnum(struct tdb_context *tdb)
  57 {
  58         if (!(tdb->flags & TDB_SEQNUM)) {
  59                 return;
  60         }
  61
  62         if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
  63                 return;
  64         }
  65
  66         tdb_increment_seqnum_nonblock(tdb);
  67
  68         tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
  69 }
  70
  71 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
  72 {
  73         return memcmp(data.dptr, key.dptr, data.dsize);
  74 }
  75
  76 /* Returns 0 on fail.  On success, return offset of record, and fills
  77    in rec */
  78 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
  79                         struct list_struct *r)
  80 {
  81         tdb_off_t rec_ptr;
  82
  83         /* read in the hash top */
  84         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
  85                 return 0;
  86
  87         /* keep looking until we find the right record */
  88         while (rec_ptr) {
  89                 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
  90                         return 0;
  91
  92                 if (!TDB_DEAD(r) && hash==r->full_hash
  93                     && key.dsize==r->key_len
  94                     && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
  95                                       r->key_len, tdb_key_compare,
  96                                       NULL) == 0) {
  97                         return rec_ptr;
  98                 }
  99                 /* detect tight infinite loop */
 100                 if (rec_ptr == r->next) {
 101                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_find: loop detected.\n"));
 102                         return TDB_ERRCODE(TDB_ERR_CORRUPT, 0);
 103                 }
 104                 rec_ptr = r->next;
 105         }
 106         return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
 107 }
 108
 109 /* As tdb_find, but if you succeed, keep the lock */
 110 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
 111                            struct list_struct *rec)
 112 {
 113         uint32_t rec_ptr;
 114
 115         if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
 116                 return 0;
 117         if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
 118                 tdb_unlock(tdb, BUCKET(hash), locktype);
 119         return rec_ptr;
 120 }
 121
 122
 123 /* update an entry in place - this only works if the new data size
 124    is <= the old data size and the key exists.
 125    on failure return -1.
 126 */
 127 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 128 {
 129         struct list_struct rec;
 130         tdb_off_t rec_ptr;
 131
 132         /* find entry */
 133         if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
 134                 return -1;
 135
 136         /* must be long enough key, data and tailer */
 137         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
 138                 tdb->ecode = TDB_SUCCESS; /* Not really an error */
 139                 return -1;
 140         }
 141
 142         if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 143                       dbuf.dptr, dbuf.dsize) == -1)
 144                 return -1;
 145
 146         if (dbuf.dsize != rec.data_len) {
 147                 /* update size */
 148                 rec.data_len = dbuf.dsize;
 149                 return tdb_rec_write(tdb, rec_ptr, &rec);
 150         }
 151
 152         return 0;
 153 }
 154
 155 /* find an entry in the database given a key */
 156 /* If an entry doesn't exist tdb_err will be set to
 157  * TDB_ERR_NOEXIST. If a key has no data attached
 158  * then the TDB_DATA will have zero length but
 159  * a non-zero pointer
 160  */
 161 TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
 162 {
 163         tdb_off_t rec_ptr;
 164         struct list_struct rec;
 165         TDB_DATA ret;
 166         uint32_t hash;
 167
 168         /* find which hash bucket it is in */
 169         hash = tdb->hash_fn(&key);
 170         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
 171                 return tdb_null;
 172
 173         ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 174                                   rec.data_len);
 175         ret.dsize = rec.data_len;
 176         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 177         return ret;
 178 }
 179
 180 /*
 181  * Find an entry in the database and hand the record's data to a parsing
 182  * function. The parsing function is executed under the chain read lock, so it
 183  * should be fast and should not block on other syscalls.
 184  *
 185  * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
 186  *
 187  * For mmapped tdb's that do not have a transaction open it points the parsing
 188  * function directly at the mmap area, it avoids the malloc/memcpy in this
 189  * case. If a transaction is open or no mmap is available, it has to do
 190  * malloc/read/parse/free.
 191  *
 192  * This is interesting for all readers of potentially large data structures in
 193  * the tdb records, ldb indexes being one example.
 194  */
 195
 196 int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
 197                      int (*parser)(TDB_DATA key, TDB_DATA data,
 198                                    void *private_data),
 199                      void *private_data)
 200 {
 201         tdb_off_t rec_ptr;
 202         struct list_struct rec;
 203         int ret;
 204         uint32_t hash;
 205
 206         /* find which hash bucket it is in */
 207         hash = tdb->hash_fn(&key);
 208
 209         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 210                 return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
 211         }
 212
 213         ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 214                              rec.data_len, parser, private_data);
 215
 216         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 217
 218         return ret;
 219 }
 220
 221 /* check if an entry in the database exists
 222
 223    note that 1 is returned if the key is found and 0 is returned if not found
 224    this doesn't match the conventions in the rest of this module, but is
 225    compatible with gdbm
 226 */
 227 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 228 {
 229         struct list_struct rec;
 230
 231         if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 232                 return 0;
 233         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 234         return 1;
 235 }
 236
 237 int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
 238 {
 239         uint32_t hash = tdb->hash_fn(&key);
 240         return tdb_exists_hash(tdb, key, hash);
 241 }
 242
 243 /* actually delete an entry in the database given the offset */
 244 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct *rec)
 245 {
 246         tdb_off_t last_ptr, i;
 247         struct list_struct lastrec;
 248
 249         if (tdb->read_only || tdb->traverse_read) return -1;
 250
 251         if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
 252             tdb_write_lock_record(tdb, rec_ptr) == -1) {
 253                 /* Someone traversing here: mark it as dead */
 254                 rec->magic = TDB_DEAD_MAGIC;
 255                 return tdb_rec_write(tdb, rec_ptr, rec);
 256         }
 257         if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
 258                 return -1;
 259
 260         /* find previous record in hash chain */
 261         if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
 262                 return -1;
 263         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 264                 if (tdb_rec_read(tdb, i, &lastrec) == -1)
 265                         return -1;
 266
 267         /* unlink it: next ptr is at start of record. */
 268         if (last_ptr == 0)
 269                 last_ptr = TDB_HASH_TOP(rec->full_hash);
 270         if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
 271                 return -1;
 272
 273         /* recover the space */
 274         if (tdb_free(tdb, rec_ptr, rec) == -1)
 275                 return -1;
 276         return 0;
 277 }
 278
 279 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
 280 {
 281         int res = 0;
 282         tdb_off_t rec_ptr;
 283         struct list_struct rec;
 284
 285         /* read in the hash top */
 286         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 287                 return 0;
 288
 289         while (rec_ptr) {
 290                 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
 291                         return 0;
 292
 293                 if (rec.magic == TDB_DEAD_MAGIC) {
 294                         res += 1;
 295                 }
 296                 rec_ptr = rec.next;
 297         }
 298         return res;
 299 }
 300
 301 /*
 302  * Purge all DEAD records from a hash chain
 303  */
 304 static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
 305 {
 306         int res = -1;
 307         struct list_struct rec;
 308         tdb_off_t rec_ptr;
 309
 310         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 311                 return -1;
 312         }
 313
 314         /* read in the hash top */
 315         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 316                 goto fail;
 317
 318         while (rec_ptr) {
 319                 tdb_off_t next;
 320
 321                 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
 322                         goto fail;
 323                 }
 324
 325                 next = rec.next;
 326
 327                 if (rec.magic == TDB_DEAD_MAGIC
 328                     && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
 329                         goto fail;
 330                 }
 331                 rec_ptr = next;
 332         }
 333         res = 0;
 334  fail:
 335         tdb_unlock(tdb, -1, F_WRLCK);
 336         return res;
 337 }
 338
 339 /* delete an entry in the database given a key */
 340 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 341 {
 342         tdb_off_t rec_ptr;
 343         struct list_struct rec;
 344         int ret;
 345
 346         if (tdb->max_dead_records != 0) {
 347
 348                 /*
 349                  * Allow for some dead records per hash chain, mainly for
 350                  * tdb's with a very high create/delete rate like locking.tdb.
 351                  */
 352
 353                 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 354                         return -1;
 355
 356                 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
 357                         /*
 358                          * Don't let the per-chain freelist grow too large,
 359                          * delete all existing dead records
 360                          */
 361                         tdb_purge_dead(tdb, hash);
 362                 }
 363
 364                 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
 365                         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 366                         return -1;
 367                 }
 368
 369                 /*
 370                  * Just mark the record as dead.
 371                  */
 372                 rec.magic = TDB_DEAD_MAGIC;
 373                 ret = tdb_rec_write(tdb, rec_ptr, &rec);
 374         }
 375         else {
 376                 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
 377                                                    &rec)))
 378                         return -1;
 379
 380                 ret = tdb_do_delete(tdb, rec_ptr, &rec);
 381         }
 382
 383         if (ret == 0) {
 384                 tdb_increment_seqnum(tdb);
 385         }
 386
 387         if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
 388                 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
 389         return ret;
 390 }
 391
 392 int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
 393 {
 394         uint32_t hash = tdb->hash_fn(&key);
 395         return tdb_delete_hash(tdb, key, hash);
 396 }
 397
 398 /*
 399  * See if we have a dead record around with enough space
 400  */
 401 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
 402                                struct list_struct *r, tdb_len_t length)
 403 {
 404         tdb_off_t rec_ptr;
 405
 406         /* read in the hash top */
 407         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 408                 return 0;
 409
 410         /* keep looking until we find the right record */
 411         while (rec_ptr) {
 412                 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
 413                         return 0;
 414
 415                 if (TDB_DEAD(r) && r->rec_len >= length) {
 416                         /*
 417                          * First fit for simple coding, TODO: change to best
 418                          * fit
 419                          */
 420                         return rec_ptr;
 421                 }
 422                 rec_ptr = r->next;
 423         }
 424         return 0;
 425 }
 426
 427 /* store an element in the database, replacing any existing element
 428    with the same key
 429
 430    return 0 on success, -1 on failure
 431 */
 432 int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 433 {
 434         struct list_struct rec;
 435         uint32_t hash;
 436         tdb_off_t rec_ptr;
 437         char *p = NULL;
 438         int ret = -1;
 439
 440         if (tdb->read_only || tdb->traverse_read) {
 441                 tdb->ecode = TDB_ERR_RDONLY;
 442                 return -1;
 443         }
 444
 445         /* find which hash bucket it is in */
 446         hash = tdb->hash_fn(&key);
 447         if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 448                 return -1;
 449
 450         /* check for it existing, on insert. */
 451         if (flag == TDB_INSERT) {
 452                 if (tdb_exists_hash(tdb, key, hash)) {
 453                         tdb->ecode = TDB_ERR_EXISTS;
 454                         goto fail;
 455                 }
 456         } else {
 457                 /* first try in-place update, on modify or replace. */
 458                 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
 459                         goto done;
 460                 }
 461                 if (tdb->ecode == TDB_ERR_NOEXIST &&
 462                     flag == TDB_MODIFY) {
 463                         /* if the record doesn't exist and we are in TDB_MODIFY mode then
 464                          we should fail the store */
 465                         goto fail;
 466                 }
 467         }
 468         /* reset the error code potentially set by the tdb_update() */
 469         tdb->ecode = TDB_SUCCESS;
 470
 471         /* delete any existing record - if it doesn't exist we don't
 472            care.  Doing this first reduces fragmentation, and avoids
 473            coalescing with `allocated' block before it's updated. */
 474         if (flag != TDB_INSERT)
 475                 tdb_delete_hash(tdb, key, hash);
 476
 477         /* Copy key+value *before* allocating free space in case malloc
 478            fails and we are left with a dead spot in the tdb. */
 479
 480         if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
 481                 tdb->ecode = TDB_ERR_OOM;
 482                 goto fail;
 483         }
 484
 485         memcpy(p, key.dptr, key.dsize);
 486         if (dbuf.dsize)
 487                 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 488
 489         if (tdb->max_dead_records != 0) {
 490                 /*
 491                  * Allow for some dead records per hash chain, look if we can
 492                  * find one that can hold the new record. We need enough space
 493                  * for key, data and tailer. If we find one, we don't have to
 494                  * consult the central freelist.
 495                  */
 496                 rec_ptr = tdb_find_dead(
 497                         tdb, hash, &rec,
 498                         key.dsize + dbuf.dsize + sizeof(tdb_off_t));
 499
 500                 if (rec_ptr != 0) {
 501                         rec.key_len = key.dsize;
 502                         rec.data_len = dbuf.dsize;
 503                         rec.full_hash = hash;
 504                         rec.magic = TDB_MAGIC;
 505                         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 506                             || tdb->methods->tdb_write(
 507                                     tdb, rec_ptr + sizeof(rec),
 508                                     p, key.dsize + dbuf.dsize) == -1) {
 509                                 goto fail;
 510                         }
 511                         goto done;
 512                 }
 513         }
 514
 515         /*
 516          * We have to allocate some space from the freelist, so this means we
 517          * have to lock it. Use the chance to purge all the DEAD records from
 518          * the hash chain under the freelist lock.
 519          */
 520
 521         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 522                 goto fail;
 523         }
 524
 525         if ((tdb->max_dead_records != 0)
 526             && (tdb_purge_dead(tdb, hash) == -1)) {
 527                 tdb_unlock(tdb, -1, F_WRLCK);
 528                 goto fail;
 529         }
 530
 531         /* we have to allocate some space */
 532         rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 533
 534         tdb_unlock(tdb, -1, F_WRLCK);
 535
 536         if (rec_ptr == 0) {
 537                 goto fail;
 538         }
 539
 540         /* Read hash top into next ptr */
 541         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
 542                 goto fail;
 543
 544         rec.key_len = key.dsize;
 545         rec.data_len = dbuf.dsize;
 546         rec.full_hash = hash;
 547         rec.magic = TDB_MAGIC;
 548
 549         /* write out and point the top of the hash chain at it */
 550         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 551             || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 552             || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
 553                 /* Need to tdb_unallocate() here */
 554                 goto fail;
 555         }
 556
 557  done:
 558         ret = 0;
 559  fail:
 560         if (ret == 0) {
 561                 tdb_increment_seqnum(tdb);
 562         }
 563
 564         SAFE_FREE(p);
 565         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 566         return ret;
 567 }
 568
 569
 570 /* Append to an entry. Create if not exist. */
 571 int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 572 {
 573         uint32_t hash;
 574         TDB_DATA dbuf;
 575         int ret = -1;
 576
 577         /* find which hash bucket it is in */
 578         hash = tdb->hash_fn(&key);
 579         if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 580                 return -1;
 581
 582         dbuf = tdb_fetch(tdb, key);
 583
 584         if (dbuf.dptr == NULL) {
 585                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 586         } else {
 587                 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
 588                 unsigned char *new_dptr;
 589
 590                 /* realloc '0' is special: don't do that. */
 591                 if (new_len == 0)
 592                         new_len = 1;
 593                 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
 594                 if (new_dptr == NULL) {
 595                         free(dbuf.dptr);
 596                 }
 597                 dbuf.dptr = new_dptr;
 598         }
 599
 600         if (dbuf.dptr == NULL) {
 601                 tdb->ecode = TDB_ERR_OOM;
 602                 goto failed;
 603         }
 604
 605         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 606         dbuf.dsize += new_dbuf.dsize;
 607
 608         ret = tdb_store(tdb, key, dbuf, 0);
 609
 610 failed:
 611         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 612         SAFE_FREE(dbuf.dptr);
 613         return ret;
 614 }
 615
 616
 617 /*
 618   return the name of the current tdb file
 619   useful for external logging functions
 620 */
 621 const char *tdb_name(struct tdb_context *tdb)
 622 {
 623         return tdb->name;
 624 }
 625
 626 /*
 627   return the underlying file descriptor being used by tdb, or -1
 628   useful for external routines that want to check the device/inode
 629   of the fd
 630 */
 631 int tdb_fd(struct tdb_context *tdb)
 632 {
 633         return tdb->fd;
 634 }
 635
 636 /*
 637   return the current logging function
 638   useful for external tdb routines that wish to log tdb errors
 639 */
 640 tdb_log_func tdb_log_fn(struct tdb_context *tdb)
 641 {
 642         return tdb->log.log_fn;
 643 }
 644
 645
 646 /*
 647   get the tdb sequence number. Only makes sense if the writers opened
 648   with TDB_SEQNUM set. Note that this sequence number will wrap quite
 649   quickly, so it should only be used for a 'has something changed'
 650   test, not for code that relies on the count of the number of changes
 651   made. If you want a counter then use a tdb record.
 652
 653   The aim of this sequence number is to allow for a very lightweight
 654   test of a possible tdb change.
 655 */
 656 int tdb_get_seqnum(struct tdb_context *tdb)
 657 {
 658         tdb_off_t seqnum=0;
 659
 660         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
 661         return seqnum;
 662 }
 663
 664 int tdb_hash_size(struct tdb_context *tdb)
 665 {
 666         return tdb->header.hash_size;
 667 }
 668
 669 size_t tdb_map_size(struct tdb_context *tdb)
 670 {
 671         return tdb->map_size;
 672 }
 673
 674 int tdb_get_flags(struct tdb_context *tdb)
 675 {
 676         return tdb->flags;
 677 }
 678
 679 void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
 680 {
 681         tdb->flags |= flags;
 682 }
 683
 684 void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
 685 {
 686         tdb->flags &= ~flags;
 687 }
 688
 689
 690 /*
 691   enable sequence number handling on an open tdb
 692 */
 693 void tdb_enable_seqnum(struct tdb_context *tdb)
 694 {
 695         tdb->flags |= TDB_SEQNUM;
 696 }
 697
 698
 699 /*
 700   add a region of the file to the freelist. Length is the size of the region in bytes,
 701   which includes the free list header that needs to be added
 702  */
 703 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
 704 {
 705         struct list_struct rec;
 706         if (length <= sizeof(rec)) {
 707                 /* the region is not worth adding */
 708                 return 0;
 709         }
 710         if (length + offset > tdb->map_size) {
 711                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
 712                 return -1;
 713         }
 714         memset(&rec,'\0',sizeof(rec));
 715         rec.rec_len = length - sizeof(rec);
 716         if (tdb_free(tdb, offset, &rec) == -1) {
 717                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
 718                 return -1;
 719         }
 720         return 0;
 721 }
 722
 723 /*
 724   wipe the entire database, deleting all records. This can be done
 725   very fast by using a global lock. The entire data portion of the
 726   file becomes a single entry in the freelist.
 727
 728   This code carefully steps around the recovery area, leaving it alone
 729  */
 730 int tdb_wipe_all(struct tdb_context *tdb)
 731 {
 732         int i;
 733         tdb_off_t offset = 0;
 734         ssize_t data_len;
 735         tdb_off_t recovery_head;
 736         tdb_len_t recovery_size = 0;
 737
 738         if (tdb_lockall(tdb) != 0) {
 739                 return -1;
 740         }
 741
 742         /* see if the tdb has a recovery area, and remember its size
 743            if so. We don't want to lose this as otherwise each
 744            tdb_wipe_all() in a transaction will increase the size of
 745            the tdb by the size of the recovery area */
 746         if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
 747                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
 748                 goto failed;
 749         }
 750
 751         if (recovery_head != 0) {
 752                 struct list_struct rec;
 753                 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
 754                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
 755                         return -1;
 756                 }
 757                 recovery_size = rec.rec_len + sizeof(rec);
 758         }
 759
 760         /* wipe the hashes */
 761         for (i=0;i<tdb->header.hash_size;i++) {
 762                 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
 763                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
 764                         goto failed;
 765                 }
 766         }
 767
 768         /* wipe the freelist */
 769         if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
 770                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
 771                 goto failed;
 772         }
 773
 774         /* add all the rest of the file to the freelist, possibly leaving a gap
 775            for the recovery area */
 776         if (recovery_size == 0) {
 777                 /* the simple case - the whole file can be used as a freelist */
 778                 data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size));
 779                 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
 780                         goto failed;
 781                 }
 782         } else {
 783                 /* we need to add two freelist entries - one on either
 784                    side of the recovery area
 785
 786                    Note that we cannot shift the recovery area during
 787                    this operation. Only the transaction.c code may
 788                    move the recovery area or we risk subtle data
 789                    corruption
 790                 */
 791                 data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size));
 792                 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
 793                         goto failed;
 794                 }
 795                 /* and the 2nd free list entry after the recovery area - if any */
 796                 data_len = tdb->map_size - (recovery_head+recovery_size);
 797                 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
 798                         goto failed;
 799                 }
 800         }
 801
 802         if (tdb_unlockall(tdb) != 0) {
 803                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
 804                 goto failed;
 805         }
 806
 807         return 0;
 808
 809 failed:
 810         tdb_unlockall(tdb);
 811         return -1;
 812 }
 813
 814 struct traverse_state {
 815         bool error;
 816         struct tdb_context *dest_db;
 817 };
 818
 819 /*
 820   traverse function for repacking
 821  */
 822 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
 823 {
 824         struct traverse_state *state = (struct traverse_state *)private_data;
 825         if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
 826                 state->error = true;
 827                 return -1;
 828         }
 829         return 0;
 830 }
 831
 832 /*
 833   repack a tdb
 834  */
 835 int tdb_repack(struct tdb_context *tdb)
 836 {
 837         struct tdb_context *tmp_db;
 838         struct traverse_state state;
 839
 840         if (tdb_transaction_start(tdb) != 0) {
 841                 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
 842                 return -1;
 843         }
 844
 845         tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
 846         if (tmp_db == NULL) {
 847                 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
 848                 tdb_transaction_cancel(tdb);
 849                 return -1;
 850         }
 851
 852         state.error = false;
 853         state.dest_db = tmp_db;
 854
 855         if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
 856                 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
 857                 tdb_transaction_cancel(tdb);
 858                 tdb_close(tmp_db);
 859                 return -1;
 860         }
 861
 862         if (state.error) {
 863                 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
 864                 tdb_transaction_cancel(tdb);
 865                 tdb_close(tmp_db);
 866                 return -1;
 867         }
 868
 869         if (tdb_wipe_all(tdb) != 0) {
 870                 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
 871                 tdb_transaction_cancel(tdb);
 872                 tdb_close(tmp_db);
 873                 return -1;
 874         }
 875
 876         state.error = false;
 877         state.dest_db = tdb;
 878
 879         if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
 880                 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
 881                 tdb_transaction_cancel(tdb);
 882                 tdb_close(tmp_db);
 883                 return -1;
 884         }
 885
 886         if (state.error) {
 887                 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
 888                 tdb_transaction_cancel(tdb);
 889                 tdb_close(tmp_db);
 890                 return -1;
 891         }
 892
 893         tdb_close(tmp_db);
 894
 895         if (tdb_transaction_commit(tdb) != 0) {
 896                 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
 897                 return -1;
 898         }
 899
 900         return 0;
 901 }