TDB2: Goodbye TDB2, Hello NTDB.
authorRusty Russell <rusty@rustcorp.com.au>
Mon, 18 Jun 2012 13:00:26 +0000 (22:30 +0930)
committerRusty Russell <rusty@rustcorp.com.au>
Tue, 19 Jun 2012 03:38:06 +0000 (05:38 +0200)
This renames everything from tdb2 to ntdb: importantly, we no longer
use the tdb_ namespace, so you can link against both ntdb and tdb if
you want to.

This also enables building of standalone ntdb by the autobuild script.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
148 files changed:
lib/ntdb/ABI/ntdb-0.9.sigs [new file with mode: 0644]
lib/ntdb/LICENSE [moved from lib/tdb2/LICENSE with 100% similarity]
lib/ntdb/Makefile [moved from lib/tdb2/Makefile with 100% similarity]
lib/ntdb/check.c [new file with mode: 0644]
lib/ntdb/configure [moved from lib/tdb2/configure with 100% similarity]
lib/ntdb/doc/TDB_porting.txt [new file with mode: 0644]
lib/ntdb/doc/design-1.3.txt [moved from lib/tdb2/doc/design-1.3.txt with 100% similarity]
lib/ntdb/doc/design.lyx [moved from lib/tdb2/doc/design.lyx with 100% similarity]
lib/ntdb/doc/design.lyx,v [moved from lib/tdb2/doc/design.lyx,v with 100% similarity]
lib/ntdb/doc/design.pdf [moved from lib/tdb2/doc/design.pdf with 100% similarity]
lib/ntdb/doc/design.txt [moved from lib/tdb2/doc/design.txt with 100% similarity]
lib/ntdb/free.c [new file with mode: 0644]
lib/ntdb/hash.c [new file with mode: 0644]
lib/ntdb/io.c [new file with mode: 0644]
lib/ntdb/lock.c [new file with mode: 0644]
lib/ntdb/ntdb.c [new file with mode: 0644]
lib/ntdb/ntdb.h [new file with mode: 0644]
lib/ntdb/ntdb.pc.in [moved from lib/tdb2/tdb.pc.in with 64% similarity]
lib/ntdb/open.c [new file with mode: 0644]
lib/ntdb/private.h [new file with mode: 0644]
lib/ntdb/pyntdb.c [new file with mode: 0644]
lib/ntdb/summary.c [moved from lib/tdb2/summary.c with 62% similarity]
lib/ntdb/test/api-12-store.c [moved from lib/tdb2/test/api-12-store.c with 59% similarity]
lib/ntdb/test/api-13-delete.c [new file with mode: 0644]
lib/ntdb/test/api-14-exists.c [new file with mode: 0644]
lib/ntdb/test/api-16-wipe_all.c [new file with mode: 0644]
lib/ntdb/test/api-21-parse_record.c [new file with mode: 0644]
lib/ntdb/test/api-55-transaction.c [new file with mode: 0644]
lib/ntdb/test/api-80-tdb_fd.c [moved from lib/tdb2/test/api-80-tdb_fd.c with 54% similarity]
lib/ntdb/test/api-81-seqnum.c [new file with mode: 0644]
lib/ntdb/test/api-82-lockattr.c [moved from lib/tdb2/test/api-82-lockattr.c with 59% similarity]
lib/ntdb/test/api-83-openhook.c [moved from lib/tdb2/test/api-83-openhook.c with 65% similarity]
lib/ntdb/test/api-91-get-stats.c [moved from lib/tdb2/test/api-91-get-stats.c with 57% similarity]
lib/ntdb/test/api-92-get-set-readonly.c [new file with mode: 0644]
lib/ntdb/test/api-93-repack.c [moved from lib/tdb2/test/api-93-repack.c with 57% similarity]
lib/ntdb/test/api-add-remove-flags.c [new file with mode: 0644]
lib/ntdb/test/api-check-callback.c [moved from lib/tdb2/test/api-check-callback.c with 59% similarity]
lib/ntdb/test/api-firstkey-nextkey.c [moved from lib/tdb2/test/api-firstkey-nextkey.c with 56% similarity]
lib/ntdb/test/api-fork-test.c [moved from lib/tdb2/test/api-fork-test.c with 53% similarity]
lib/ntdb/test/api-locktimeout.c [moved from lib/tdb2/test/api-locktimeout.c with 74% similarity]
lib/ntdb/test/api-missing-entries.c [moved from lib/tdb2/test/api-missing-entries.c with 64% similarity]
lib/ntdb/test/api-open-multiple-times.c [new file with mode: 0644]
lib/ntdb/test/api-record-expand.c [moved from lib/tdb2/test/api-record-expand.c with 59% similarity]
lib/ntdb/test/api-simple-delete.c [new file with mode: 0644]
lib/ntdb/test/api-summary.c [moved from lib/tdb2/test/api-summary.c with 61% similarity]
lib/ntdb/test/external-agent.c [moved from lib/tdb2/test/external-agent.c with 75% similarity]
lib/ntdb/test/external-agent.h [moved from lib/tdb2/test/external-agent.h with 70% similarity]
lib/ntdb/test/failtest_helper.c [moved from lib/tdb2/test/failtest_helper.c with 94% similarity]
lib/ntdb/test/failtest_helper.h [moved from lib/tdb2/test/failtest_helper.h with 75% similarity]
lib/ntdb/test/helpapi-external-agent.c [new file with mode: 0644]
lib/ntdb/test/helprun-external-agent.c [new file with mode: 0644]
lib/ntdb/test/helprun-layout.c [new file with mode: 0644]
lib/ntdb/test/layout.h [moved from lib/tdb2/test/layout.h with 50% similarity]
lib/ntdb/test/lock-tracking.c [moved from lib/tdb2/test/lock-tracking.c with 92% similarity]
lib/ntdb/test/lock-tracking.h [moved from lib/tdb2/test/lock-tracking.h with 100% similarity]
lib/ntdb/test/logging.c [moved from lib/tdb2/test/logging.c with 57% similarity]
lib/ntdb/test/logging.h [new file with mode: 0644]
lib/ntdb/test/ntdb-source.h [moved from lib/tdb2/test/tdb2-source.h with 91% similarity]
lib/ntdb/test/run-001-encode.c [moved from lib/tdb2/test/run-001-encode.c with 68% similarity]
lib/ntdb/test/run-001-fls.c [moved from lib/tdb2/test/run-001-fls.c with 95% similarity]
lib/ntdb/test/run-01-new_database.c [moved from lib/tdb2/test/run-01-new_database.c with 67% similarity]
lib/ntdb/test/run-02-expand.c [new file with mode: 0644]
lib/ntdb/test/run-03-coalesce.c [new file with mode: 0644]
lib/ntdb/test/run-04-basichash.c [new file with mode: 0644]
lib/ntdb/test/run-05-readonly-open.c [moved from lib/tdb2/test/run-05-readonly-open.c with 57% similarity]
lib/ntdb/test/run-10-simple-store.c [moved from lib/tdb2/test/run-10-simple-store.c with 57% similarity]
lib/ntdb/test/run-11-simple-fetch.c [moved from lib/tdb2/test/run-11-simple-fetch.c with 53% similarity]
lib/ntdb/test/run-12-check.c [moved from lib/tdb2/test/run-12-check.c with 60% similarity]
lib/ntdb/test/run-15-append.c [moved from lib/tdb2/test/run-15-append.c with 52% similarity]
lib/ntdb/test/run-20-growhash.c [new file with mode: 0644]
lib/ntdb/test/run-25-hashoverload.c [new file with mode: 0644]
lib/ntdb/test/run-30-exhaust-before-expand.c [new file with mode: 0644]
lib/ntdb/test/run-35-convert.c [moved from lib/tdb2/test/run-35-convert.c with 52% similarity]
lib/ntdb/test/run-50-multiple-freelists.c [new file with mode: 0644]
lib/ntdb/test/run-56-open-during-transaction.c [moved from lib/tdb2/test/run-56-open-during-transaction.c with 81% similarity]
lib/ntdb/test/run-57-die-during-transaction.c [moved from lib/tdb2/test/run-57-die-during-transaction.c with 90% similarity]
lib/ntdb/test/run-64-bit-tdb.c [new file with mode: 0644]
lib/ntdb/test/run-90-get-set-attributes.c [new file with mode: 0644]
lib/ntdb/test/run-capabilities.c [moved from lib/tdb2/test/run-capabilities.c with 63% similarity]
lib/ntdb/test/run-expand-in-transaction.c [new file with mode: 0644]
lib/ntdb/test/run-features.c [moved from lib/tdb2/test/run-features.c with 50% similarity]
lib/ntdb/test/run-lockall.c [moved from lib/tdb2/test/run-lockall.c with 70% similarity]
lib/ntdb/test/run-remap-in-read_traverse.c [moved from lib/tdb2/test/run-remap-in-read_traverse.c with 58% similarity]
lib/ntdb/test/run-seed.c [new file with mode: 0644]
lib/ntdb/test/run-tdb_errorstr.c [new file with mode: 0644]
lib/ntdb/test/run-tdb_foreach.c [moved from lib/tdb2/test/run-tdb_foreach.c with 54% similarity]
lib/ntdb/test/run-traverse.c [moved from lib/tdb2/test/run-traverse.c with 66% similarity]
lib/ntdb/test/tap-interface.c [moved from lib/tdb2/test/tap-interface.c with 100% similarity]
lib/ntdb/test/tap-interface.h [moved from lib/tdb2/test/tap-interface.h with 100% similarity]
lib/ntdb/tools/Makefile [new file with mode: 0644]
lib/ntdb/tools/growtdb-bench.c [new file with mode: 0644]
lib/ntdb/tools/mkntdb.c [moved from lib/tdb2/tools/mktdb2.c with 69% similarity]
lib/ntdb/tools/ntdbbackup.c [moved from lib/tdb2/tools/tdb2backup.c with 62% similarity]
lib/ntdb/tools/ntdbdump.c [moved from lib/tdb2/tools/tdb2dump.c with 77% similarity]
lib/ntdb/tools/ntdbrestore.c [moved from lib/tdb2/tools/tdb2restore.c with 84% similarity]
lib/ntdb/tools/ntdbtool.c [moved from lib/tdb2/tools/tdb2tool.c with 69% similarity]
lib/ntdb/tools/ntdbtorture.c [moved from lib/tdb2/tools/tdb2torture.c with 76% similarity]
lib/ntdb/tools/speed.c [moved from lib/tdb2/tools/speed.c with 60% similarity]
lib/ntdb/transaction.c [new file with mode: 0644]
lib/ntdb/traverse.c [new file with mode: 0644]
lib/ntdb/wscript [new file with mode: 0644]
lib/tdb2/ABI/tdb-2.0.0.sigs [deleted file]
lib/tdb2/ABI/tdb-2.0.1.sigs [deleted file]
lib/tdb2/TODO [deleted file]
lib/tdb2/_info [deleted file]
lib/tdb2/check.c [deleted file]
lib/tdb2/doc/TDB1_porting.txt [deleted file]
lib/tdb2/free.c [deleted file]
lib/tdb2/hash.c [deleted file]
lib/tdb2/io.c [deleted file]
lib/tdb2/lock.c [deleted file]
lib/tdb2/open.c [deleted file]
lib/tdb2/private.h [deleted file]
lib/tdb2/pytdb.c [deleted file]
lib/tdb2/tdb.c [deleted file]
lib/tdb2/tdb2.h [deleted file]
lib/tdb2/test/api-13-delete.c [deleted file]
lib/tdb2/test/api-14-exists.c [deleted file]
lib/tdb2/test/api-16-wipe_all.c [deleted file]
lib/tdb2/test/api-21-parse_record.c [deleted file]
lib/tdb2/test/api-55-transaction.c [deleted file]
lib/tdb2/test/api-81-seqnum.c [deleted file]
lib/tdb2/test/api-92-get-set-readonly.c [deleted file]
lib/tdb2/test/api-add-remove-flags.c [deleted file]
lib/tdb2/test/api-open-multiple-times.c [deleted file]
lib/tdb2/test/api-simple-delete.c [deleted file]
lib/tdb2/test/helpapi-external-agent.c [deleted file]
lib/tdb2/test/helprun-external-agent.c [deleted file]
lib/tdb2/test/helprun-layout.c [deleted file]
lib/tdb2/test/logging.h [deleted file]
lib/tdb2/test/run-02-expand.c [deleted file]
lib/tdb2/test/run-03-coalesce.c [deleted file]
lib/tdb2/test/run-04-basichash.c [deleted file]
lib/tdb2/test/run-20-growhash.c [deleted file]
lib/tdb2/test/run-25-hashoverload.c [deleted file]
lib/tdb2/test/run-30-exhaust-before-expand.c [deleted file]
lib/tdb2/test/run-50-multiple-freelists.c [deleted file]
lib/tdb2/test/run-64-bit-tdb.c [deleted file]
lib/tdb2/test/run-90-get-set-attributes.c [deleted file]
lib/tdb2/test/run-expand-in-transaction.c [deleted file]
lib/tdb2/test/run-seed.c [deleted file]
lib/tdb2/test/run-tdb_errorstr.c [deleted file]
lib/tdb2/tools/Makefile [deleted file]
lib/tdb2/tools/growtdb-bench.c [deleted file]
lib/tdb2/transaction.c [deleted file]
lib/tdb2/traverse.c [deleted file]
lib/tdb2/wscript [deleted file]
script/autobuild.py

diff --git a/lib/ntdb/ABI/ntdb-0.9.sigs b/lib/ntdb/ABI/ntdb-0.9.sigs
new file mode 100644 (file)
index 0000000..6dae18f
--- /dev/null
@@ -0,0 +1,39 @@
+ntdb_add_flag: void (struct ntdb_context *, unsigned int)
+ntdb_append: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA)
+ntdb_chainlock: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
+ntdb_chainlock_read: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
+ntdb_chainunlock: void (struct ntdb_context *, NTDB_DATA)
+ntdb_chainunlock_read: void (struct ntdb_context *, NTDB_DATA)
+ntdb_check_: enum NTDB_ERROR (struct ntdb_context *, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *)
+ntdb_close: int (struct ntdb_context *)
+ntdb_delete: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
+ntdb_error: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_errorstr: const char *(enum NTDB_ERROR)
+ntdb_exists: bool (struct ntdb_context *, NTDB_DATA)
+ntdb_fd: int (const struct ntdb_context *)
+ntdb_fetch: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA *)
+ntdb_firstkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *)
+ntdb_foreach_: void (int (*)(struct ntdb_context *, void *), void *)
+ntdb_get_attribute: enum NTDB_ERROR (struct ntdb_context *, union ntdb_attribute *)
+ntdb_get_flags: unsigned int (struct ntdb_context *)
+ntdb_get_seqnum: int64_t (struct ntdb_context *)
+ntdb_lockall: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_lockall_read: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_name: const char *(const struct ntdb_context *)
+ntdb_nextkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *)
+ntdb_open: struct ntdb_context *(const char *, int, int, mode_t, union ntdb_attribute *)
+ntdb_parse_record_: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *)
+ntdb_remove_flag: void (struct ntdb_context *, unsigned int)
+ntdb_repack: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_set_attribute: enum NTDB_ERROR (struct ntdb_context *, const union ntdb_attribute *)
+ntdb_store: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA, int)
+ntdb_summary: enum NTDB_ERROR (struct ntdb_context *, enum ntdb_summary_flags, char **)
+ntdb_transaction_cancel: void (struct ntdb_context *)
+ntdb_transaction_commit: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_transaction_prepare_commit: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_transaction_start: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_traverse_: int64_t (struct ntdb_context *, int (*)(struct ntdb_context *, NTDB_DATA, NTDB_DATA, void *), void *)
+ntdb_unlockall: void (struct ntdb_context *)
+ntdb_unlockall_read: void (struct ntdb_context *)
+ntdb_unset_attribute: void (struct ntdb_context *, enum ntdb_attribute_type)
+ntdb_wipe_all: enum NTDB_ERROR (struct ntdb_context *)
similarity index 100%
rename from lib/tdb2/LICENSE
rename to lib/ntdb/LICENSE
similarity index 100%
rename from lib/tdb2/Makefile
rename to lib/ntdb/Makefile
diff --git a/lib/ntdb/check.c b/lib/ntdb/check.c
new file mode 100644 (file)
index 0000000..1c676c7
--- /dev/null
@@ -0,0 +1,864 @@
+ /*
+   Trivial Database 2: free list/block handling
+   Copyright (C) Rusty Russell 2010
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/likely/likely.h>
+#include <ccan/asearch/asearch.h>
+
+/* We keep an ordered array of offsets. */
+static bool append(ntdb_off_t **arr, size_t *num, ntdb_off_t off)
+{
+       ntdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(ntdb_off_t));
+       if (!new)
+               return false;
+       new[(*num)++] = off;
+       *arr = new;
+       return true;
+}
+
+static enum NTDB_ERROR check_header(struct ntdb_context *ntdb, ntdb_off_t *recovery,
+                                  uint64_t *features, size_t *num_capabilities)
+{
+       uint64_t hash_test;
+       struct ntdb_header hdr;
+       enum NTDB_ERROR ecode;
+       ntdb_off_t off, next;
+
+       ecode = ntdb_read_convert(ntdb, 0, &hdr, sizeof(hdr));
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+       /* magic food should not be converted, so convert back. */
+       ntdb_convert(ntdb, hdr.magic_food, sizeof(hdr.magic_food));
+
+       hash_test = NTDB_HASH_MAGIC;
+       hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
+       if (hdr.hash_test != hash_test) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "check: hash test %llu should be %llu",
+                                 (long long)hdr.hash_test,
+                                 (long long)hash_test);
+       }
+
+       if (strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "check: bad magic '%.*s'",
+                                 (unsigned)sizeof(hdr.magic_food),
+                                 hdr.magic_food);
+       }
+
+       /* Features which are used must be a subset of features offered. */
+       if (hdr.features_used & ~hdr.features_offered) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "check: features used (0x%llx) which"
+                                 " are not offered (0x%llx)",
+                                 (long long)hdr.features_used,
+                                 (long long)hdr.features_offered);
+       }
+
+       *features = hdr.features_offered;
+       *recovery = hdr.recovery;
+       if (*recovery) {
+               if (*recovery < sizeof(hdr)
+                   || *recovery > ntdb->file->map_size) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                         "ntdb_check:"
+                                         " invalid recovery offset %zu",
+                                         (size_t)*recovery);
+               }
+       }
+
+       for (off = hdr.capabilities; off && ecode == NTDB_SUCCESS; off = next) {
+               const struct ntdb_capability *cap;
+               enum NTDB_ERROR e;
+
+               cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
+               if (NTDB_PTR_IS_ERR(cap)) {
+                       return NTDB_PTR_ERR(cap);
+               }
+
+               /* All capabilities are unknown. */
+               e = unknown_capability(ntdb, "ntdb_check", cap->type);
+               next = cap->next;
+               ntdb_access_release(ntdb, cap);
+               if (e)
+                       return e;
+               (*num_capabilities)++;
+       }
+
+       /* Don't check reserved: they *can* be used later. */
+       return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR check_hash_tree(struct ntdb_context *ntdb,
+                                     ntdb_off_t off, unsigned int group_bits,
+                                     uint64_t hprefix,
+                                     unsigned hprefix_bits,
+                                     ntdb_off_t used[],
+                                     size_t num_used,
+                                     size_t *num_found,
+                                     enum NTDB_ERROR (*check)(NTDB_DATA,
+                                                             NTDB_DATA, void *),
+                                     void *data);
+
+static enum NTDB_ERROR check_hash_chain(struct ntdb_context *ntdb,
+                                      ntdb_off_t off,
+                                      uint64_t hash,
+                                      ntdb_off_t used[],
+                                      size_t num_used,
+                                      size_t *num_found,
+                                      enum NTDB_ERROR (*check)(NTDB_DATA,
+                                                              NTDB_DATA,
+                                                              void *),
+                                      void *data)
+{
+       struct ntdb_used_record rec;
+       enum NTDB_ERROR ecode;
+
+       ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       if (rec_magic(&rec) != NTDB_CHAIN_MAGIC) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: Bad hash chain magic %llu",
+                                 (long long)rec_magic(&rec));
+       }
+
+       if (rec_data_length(&rec) != sizeof(struct ntdb_chain)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check:"
+                                 " Bad hash chain length %llu vs %zu",
+                                 (long long)rec_data_length(&rec),
+                                 sizeof(struct ntdb_chain));
+       }
+       if (rec_key_length(&rec) != 0) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: Bad hash chain key length %llu",
+                                 (long long)rec_key_length(&rec));
+       }
+       if (rec_hash(&rec) != 0) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: Bad hash chain hash value %llu",
+                                 (long long)rec_hash(&rec));
+       }
+
+       off += sizeof(rec);
+       ecode = check_hash_tree(ntdb, off, 0, hash, 64,
+                               used, num_used, num_found, check, data);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       off = ntdb_read_off(ntdb, off + offsetof(struct ntdb_chain, next));
+       if (NTDB_OFF_IS_ERR(off)) {
+               return NTDB_OFF_TO_ERR(off);
+       }
+       if (off == 0)
+               return NTDB_SUCCESS;
+       (*num_found)++;
+       return check_hash_chain(ntdb, off, hash, used, num_used, num_found,
+                               check, data);
+}
+
+static enum NTDB_ERROR check_hash_record(struct ntdb_context *ntdb,
+                                       ntdb_off_t off,
+                                       uint64_t hprefix,
+                                       unsigned hprefix_bits,
+                                       ntdb_off_t used[],
+                                       size_t num_used,
+                                       size_t *num_found,
+                                       enum NTDB_ERROR (*check)(NTDB_DATA,
+                                                               NTDB_DATA,
+                                                               void *),
+                                       void *data)
+{
+       struct ntdb_used_record rec;
+       enum NTDB_ERROR ecode;
+
+       if (hprefix_bits >= 64)
+               return check_hash_chain(ntdb, off, hprefix, used, num_used,
+                                       num_found, check, data);
+
+       ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       if (rec_magic(&rec) != NTDB_HTABLE_MAGIC) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: Bad hash table magic %llu",
+                                 (long long)rec_magic(&rec));
+       }
+       if (rec_data_length(&rec)
+           != sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check:"
+                                 " Bad hash table length %llu vs %llu",
+                                 (long long)rec_data_length(&rec),
+                                 (long long)sizeof(ntdb_off_t)
+                                 << NTDB_SUBLEVEL_HASH_BITS);
+       }
+       if (rec_key_length(&rec) != 0) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: Bad hash table key length %llu",
+                                 (long long)rec_key_length(&rec));
+       }
+       if (rec_hash(&rec) != 0) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: Bad hash table hash value %llu",
+                                 (long long)rec_hash(&rec));
+       }
+
+       off += sizeof(rec);
+       return check_hash_tree(ntdb, off,
+                              NTDB_SUBLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS,
+                              hprefix, hprefix_bits,
+                              used, num_used, num_found, check, data);
+}
+
+static int off_cmp(const ntdb_off_t *a, const ntdb_off_t *b)
+{
+       /* Can overflow an int. */
+       return *a > *b ? 1
+               : *a < *b ? -1
+               : 0;
+}
+
+static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
+{
+       *used += num;
+
+       return (h >> (64 - *used)) & ((1U << num) - 1);
+}
+
+static enum NTDB_ERROR check_hash_tree(struct ntdb_context *ntdb,
+                                     ntdb_off_t off, unsigned int group_bits,
+                                     uint64_t hprefix,
+                                     unsigned hprefix_bits,
+                                     ntdb_off_t used[],
+                                     size_t num_used,
+                                     size_t *num_found,
+                                     enum NTDB_ERROR (*check)(NTDB_DATA,
+                                                             NTDB_DATA, void *),
+                                     void *data)
+{
+       unsigned int g, b;
+       const ntdb_off_t *hash;
+       struct ntdb_used_record rec;
+       enum NTDB_ERROR ecode;
+
+       hash = ntdb_access_read(ntdb, off,
+                              sizeof(ntdb_off_t)
+                              << (group_bits + NTDB_HASH_GROUP_BITS),
+                              true);
+       if (NTDB_PTR_IS_ERR(hash)) {
+               return NTDB_PTR_ERR(hash);
+       }
+
+       for (g = 0; g < (1 << group_bits); g++) {
+               const ntdb_off_t *group = hash + (g << NTDB_HASH_GROUP_BITS);
+               for (b = 0; b < (1 << NTDB_HASH_GROUP_BITS); b++) {
+                       unsigned int bucket, i, used_bits;
+                       uint64_t h;
+                       ntdb_off_t *p;
+                       if (group[b] == 0)
+                               continue;
+
+                       off = group[b] & NTDB_OFF_MASK;
+                       p = asearch(&off, used, num_used, off_cmp);
+                       if (!p) {
+                               ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                  NTDB_LOG_ERROR,
+                                                  "ntdb_check: Invalid offset"
+                                                  " %llu in hash",
+                                                  (long long)off);
+                               goto fail;
+                       }
+                       /* Mark it invalid. */
+                       *p ^= 1;
+                       (*num_found)++;
+
+                       if (hprefix_bits == 64) {
+                               /* Chained entries are unordered. */
+                               if (is_subhash(group[b])) {
+                                       ecode = NTDB_ERR_CORRUPT;
+                                       ntdb_logerr(ntdb, ecode,
+                                                  NTDB_LOG_ERROR,
+                                                  "ntdb_check: Invalid chain"
+                                                  " entry subhash");
+                                       goto fail;
+                               }
+                               h = hash_record(ntdb, off);
+                               if (h != hprefix) {
+                                       ecode = NTDB_ERR_CORRUPT;
+                                       ntdb_logerr(ntdb, ecode,
+                                                  NTDB_LOG_ERROR,
+                                                  "check: bad hash chain"
+                                                  " placement"
+                                                  " 0x%llx vs 0x%llx",
+                                                  (long long)h,
+                                                  (long long)hprefix);
+                                       goto fail;
+                               }
+                               ecode = ntdb_read_convert(ntdb, off, &rec,
+                                                        sizeof(rec));
+                               if (ecode != NTDB_SUCCESS) {
+                                       goto fail;
+                               }
+                               goto check;
+                       }
+
+                       if (is_subhash(group[b])) {
+                               uint64_t subprefix;
+                               subprefix = (hprefix
+                                    << (group_bits + NTDB_HASH_GROUP_BITS))
+                                       + g * (1 << NTDB_HASH_GROUP_BITS) + b;
+
+                               ecode = check_hash_record(ntdb,
+                                              group[b] & NTDB_OFF_MASK,
+                                              subprefix,
+                                              hprefix_bits
+                                                      + group_bits
+                                                      + NTDB_HASH_GROUP_BITS,
+                                              used, num_used, num_found,
+                                              check, data);
+                               if (ecode != NTDB_SUCCESS) {
+                                       goto fail;
+                               }
+                               continue;
+                       }
+                       /* A normal entry */
+
+                       /* Does it belong here at all? */
+                       h = hash_record(ntdb, off);
+                       used_bits = 0;
+                       if (get_bits(h, hprefix_bits, &used_bits) != hprefix
+                           && hprefix_bits) {
+                               ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                  NTDB_LOG_ERROR,
+                                                  "check: bad hash placement"
+                                                  " 0x%llx vs 0x%llx",
+                                                  (long long)h,
+                                                  (long long)hprefix);
+                               goto fail;
+                       }
+
+                       /* Does it belong in this group? */
+                       if (get_bits(h, group_bits, &used_bits) != g) {
+                               ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                  NTDB_LOG_ERROR,
+                                                  "check: bad group %llu"
+                                                  " vs %u",
+                                                  (long long)h, g);
+                               goto fail;
+                       }
+
+                       /* Are bucket bits correct? */
+                       bucket = group[b] & NTDB_OFF_HASH_GROUP_MASK;
+                       if (get_bits(h, NTDB_HASH_GROUP_BITS, &used_bits)
+                           != bucket) {
+                               used_bits -= NTDB_HASH_GROUP_BITS;
+                               ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                  NTDB_LOG_ERROR,
+                                                  "check: bad bucket %u vs %u",
+                                                  (unsigned)get_bits(h,
+                                                       NTDB_HASH_GROUP_BITS,
+                                                       &used_bits),
+                                                  bucket);
+                               goto fail;
+                       }
+
+                       /* There must not be any zero entries between
+                        * the bucket it belongs in and this one! */
+                       for (i = bucket;
+                            i != b;
+                            i = (i + 1) % (1 << NTDB_HASH_GROUP_BITS)) {
+                               if (group[i] == 0) {
+                                       ecode = NTDB_ERR_CORRUPT;
+                                       ntdb_logerr(ntdb, ecode,
+                                                  NTDB_LOG_ERROR,
+                                                  "check: bad group placement"
+                                                  " %u vs %u",
+                                                  b, bucket);
+                                       goto fail;
+                               }
+                       }
+
+                       ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+                       if (ecode != NTDB_SUCCESS) {
+                               goto fail;
+                       }
+
+                       /* Bottom bits must match header. */
+                       if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
+                               ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                  NTDB_LOG_ERROR,
+                                                  "ntdb_check: Bad hash magic"
+                                                  " at offset %llu"
+                                                  " (0x%llx vs 0x%llx)",
+                                                  (long long)off,
+                                                  (long long)h,
+                                                  (long long)rec_hash(&rec));
+                               goto fail;
+                       }
+
+               check:
+                       if (check) {
+                               NTDB_DATA k, d;
+                               const unsigned char *kptr;
+
+                               kptr = ntdb_access_read(ntdb,
+                                                      off + sizeof(rec),
+                                                      rec_key_length(&rec)
+                                                      + rec_data_length(&rec),
+                                                      false);
+                               if (NTDB_PTR_IS_ERR(kptr)) {
+                                       ecode = NTDB_PTR_ERR(kptr);
+                                       goto fail;
+                               }
+
+                               k = ntdb_mkdata(kptr, rec_key_length(&rec));
+                               d = ntdb_mkdata(kptr + k.dsize,
+                                              rec_data_length(&rec));
+                               ecode = check(k, d, data);
+                               ntdb_access_release(ntdb, kptr);
+                               if (ecode != NTDB_SUCCESS) {
+                                       goto fail;
+                               }
+                       }
+               }
+       }
+       ntdb_access_release(ntdb, hash);
+       return NTDB_SUCCESS;
+
+fail:
+       ntdb_access_release(ntdb, hash);
+       return ecode;
+}
+
+static enum NTDB_ERROR check_hash(struct ntdb_context *ntdb,
+                                ntdb_off_t used[],
+                                size_t num_used, size_t num_other_used,
+                                enum NTDB_ERROR (*check)(NTDB_DATA, NTDB_DATA, void *),
+                                void *data)
+{
+       /* Free tables and capabilities also show up as used. */
+       size_t num_found = num_other_used;
+       enum NTDB_ERROR ecode;
+
+       ecode = check_hash_tree(ntdb, offsetof(struct ntdb_header, hashtable),
+                               NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS,
+                               0, 0, used, num_used, &num_found,
+                               check, data);
+       if (ecode == NTDB_SUCCESS) {
+               if (num_found != num_used) {
+                       ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                          "ntdb_check: Not all entries"
+                                          " are in hash");
+               }
+       }
+       return ecode;
+}
+
+static enum NTDB_ERROR check_free(struct ntdb_context *ntdb,
+                                ntdb_off_t off,
+                                const struct ntdb_free_record *frec,
+                                ntdb_off_t prev, unsigned int ftable,
+                                unsigned int bucket)
+{
+       enum NTDB_ERROR ecode;
+
+       if (frec_magic(frec) != NTDB_FREE_MAGIC) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: offset %llu bad magic 0x%llx",
+                                 (long long)off,
+                                 (long long)frec->magic_and_prev);
+       }
+       if (frec_ftable(frec) != ftable) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: offset %llu bad freetable %u",
+                                 (long long)off, frec_ftable(frec));
+
+       }
+
+       ecode = ntdb->io->oob(ntdb, off,
+                            frec_len(frec)
+                            + sizeof(struct ntdb_used_record),
+                            false);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+       if (size_to_bucket(frec_len(frec)) != bucket) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: offset %llu in wrong bucket"
+                                 " (%u vs %u)",
+                                 (long long)off,
+                                 bucket, size_to_bucket(frec_len(frec)));
+       }
+       if (prev && prev != frec_prev(frec)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: offset %llu bad prev"
+                                 " (%llu vs %llu)",
+                                 (long long)off,
+                                 (long long)prev, (long long)frec_len(frec));
+       }
+       return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR check_free_table(struct ntdb_context *ntdb,
+                                      ntdb_off_t ftable_off,
+                                      unsigned ftable_num,
+                                      ntdb_off_t fr[],
+                                      size_t num_free,
+                                      size_t *num_found)
+{
+       struct ntdb_freetable ft;
+       ntdb_off_t h;
+       unsigned int i;
+       enum NTDB_ERROR ecode;
+
+       ecode = ntdb_read_convert(ntdb, ftable_off, &ft, sizeof(ft));
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       if (rec_magic(&ft.hdr) != NTDB_FTABLE_MAGIC
+           || rec_key_length(&ft.hdr) != 0
+           || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)
+           || rec_hash(&ft.hdr) != 0) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: Invalid header on free table");
+       }
+
+       for (i = 0; i < NTDB_FREE_BUCKETS; i++) {
+               ntdb_off_t off, prev = 0, *p, first = 0;
+               struct ntdb_free_record f;
+
+               h = bucket_off(ftable_off, i);
+               for (off = ntdb_read_off(ntdb, h); off; off = f.next) {
+                       if (NTDB_OFF_IS_ERR(off)) {
+                               return NTDB_OFF_TO_ERR(off);
+                       }
+                       if (!first) {
+                               off &= NTDB_OFF_MASK;
+                               first = off;
+                       }
+                       ecode = ntdb_read_convert(ntdb, off, &f, sizeof(f));
+                       if (ecode != NTDB_SUCCESS) {
+                               return ecode;
+                       }
+                       ecode = check_free(ntdb, off, &f, prev, ftable_num, i);
+                       if (ecode != NTDB_SUCCESS) {
+                               return ecode;
+                       }
+
+                       /* FIXME: Check hash bits */
+                       p = asearch(&off, fr, num_free, off_cmp);
+                       if (!p) {
+                               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                 NTDB_LOG_ERROR,
+                                                 "ntdb_check: Invalid offset"
+                                                 " %llu in free table",
+                                                 (long long)off);
+                       }
+                       /* Mark it invalid. */
+                       *p ^= 1;
+                       (*num_found)++;
+                       prev = off;
+               }
+
+               if (first) {
+                       /* Now we can check first back pointer. */
+                       ecode = ntdb_read_convert(ntdb, first, &f, sizeof(f));
+                       if (ecode != NTDB_SUCCESS) {
+                               return ecode;
+                       }
+                       ecode = check_free(ntdb, first, &f, prev, ftable_num, i);
+                       if (ecode != NTDB_SUCCESS) {
+                               return ecode;
+                       }
+               }
+       }
+       return NTDB_SUCCESS;
+}
+
+/* Slow, but should be very rare. */
+ntdb_off_t dead_space(struct ntdb_context *ntdb, ntdb_off_t off)
+{
+       size_t len;
+       enum NTDB_ERROR ecode;
+
+       for (len = 0; off + len < ntdb->file->map_size; len++) {
+               char c;
+               ecode = ntdb->io->tread(ntdb, off, &c, 1);
+               if (ecode != NTDB_SUCCESS) {
+                       return NTDB_ERR_TO_OFF(ecode);
+               }
+               if (c != 0 && c != 0x43)
+                       break;
+       }
+       return len;
+}
+
+static enum NTDB_ERROR check_linear(struct ntdb_context *ntdb,
+                                  ntdb_off_t **used, size_t *num_used,
+                                  ntdb_off_t **fr, size_t *num_free,
+                                  uint64_t features, ntdb_off_t recovery)
+{
+       ntdb_off_t off;
+       ntdb_len_t len;
+       enum NTDB_ERROR ecode;
+       bool found_recovery = false;
+
+       for (off = sizeof(struct ntdb_header);
+            off < ntdb->file->map_size;
+            off += len) {
+               union {
+                       struct ntdb_used_record u;
+                       struct ntdb_free_record f;
+                       struct ntdb_recovery_record r;
+               } rec;
+               /* r is larger: only get that if we need to. */
+               ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.f));
+               if (ecode != NTDB_SUCCESS) {
+                       return ecode;
+               }
+
+               /* If we crash after ftruncate, we can get zeroes or fill. */
+               if (rec.r.magic == NTDB_RECOVERY_INVALID_MAGIC
+                   || rec.r.magic ==  0x4343434343434343ULL) {
+                       ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.r));
+                       if (ecode != NTDB_SUCCESS) {
+                               return ecode;
+                       }
+                       if (recovery == off) {
+                               found_recovery = true;
+                               len = sizeof(rec.r) + rec.r.max_len;
+                       } else {
+                               len = dead_space(ntdb, off);
+                               if (NTDB_OFF_IS_ERR(len)) {
+                                       return NTDB_OFF_TO_ERR(len);
+                               }
+                               if (len < sizeof(rec.r)) {
+                                       return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                         NTDB_LOG_ERROR,
+                                                         "ntdb_check: invalid"
+                                                         " dead space at %zu",
+                                                         (size_t)off);
+                               }
+
+                               ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
+                                          "Dead space at %zu-%zu (of %zu)",
+                                          (size_t)off, (size_t)(off + len),
+                                          (size_t)ntdb->file->map_size);
+                       }
+               } else if (rec.r.magic == NTDB_RECOVERY_MAGIC) {
+                       ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.r));
+                       if (ecode != NTDB_SUCCESS) {
+                               return ecode;
+                       }
+                       if (recovery != off) {
+                               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                 NTDB_LOG_ERROR,
+                                                 "ntdb_check: unexpected"
+                                                 " recovery record at offset"
+                                                 " %zu",
+                                                 (size_t)off);
+                       }
+                       if (rec.r.len > rec.r.max_len) {
+                               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                 NTDB_LOG_ERROR,
+                                                 "ntdb_check: invalid recovery"
+                                                 " length %zu",
+                                                 (size_t)rec.r.len);
+                       }
+                       if (rec.r.eof > ntdb->file->map_size) {
+                               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                 NTDB_LOG_ERROR,
+                                                 "ntdb_check: invalid old EOF"
+                                                 " %zu", (size_t)rec.r.eof);
+                       }
+                       found_recovery = true;
+                       len = sizeof(rec.r) + rec.r.max_len;
+               } else if (frec_magic(&rec.f) == NTDB_FREE_MAGIC) {
+                       len = sizeof(rec.u) + frec_len(&rec.f);
+                       if (off + len > ntdb->file->map_size) {
+                               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                 NTDB_LOG_ERROR,
+                                                 "ntdb_check: free overlength"
+                                                 " %llu at offset %llu",
+                                                 (long long)len,
+                                                 (long long)off);
+                       }
+                       /* This record should be in free lists. */
+                       if (frec_ftable(&rec.f) != NTDB_FTABLE_NONE
+                           && !append(fr, num_free, off)) {
+                               return ntdb_logerr(ntdb, NTDB_ERR_OOM,
+                                                 NTDB_LOG_ERROR,
+                                                 "ntdb_check: tracking %zu'th"
+                                                 " free record.", *num_free);
+                       }
+               } else if (rec_magic(&rec.u) == NTDB_USED_MAGIC
+                          || rec_magic(&rec.u) == NTDB_CHAIN_MAGIC
+                          || rec_magic(&rec.u) == NTDB_HTABLE_MAGIC
+                          || rec_magic(&rec.u) == NTDB_FTABLE_MAGIC
+                          || rec_magic(&rec.u) == NTDB_CAP_MAGIC) {
+                       uint64_t klen, dlen, extra;
+
+                       /* This record is used! */
+                       if (!append(used, num_used, off)) {
+                               return ntdb_logerr(ntdb, NTDB_ERR_OOM,
+                                                 NTDB_LOG_ERROR,
+                                                 "ntdb_check: tracking %zu'th"
+                                                 " used record.", *num_used);
+                       }
+
+                       klen = rec_key_length(&rec.u);
+                       dlen = rec_data_length(&rec.u);
+                       extra = rec_extra_padding(&rec.u);
+
+                       len = sizeof(rec.u) + klen + dlen + extra;
+                       if (off + len > ntdb->file->map_size) {
+                               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                 NTDB_LOG_ERROR,
+                                                 "ntdb_check: used overlength"
+                                                 " %llu at offset %llu",
+                                                 (long long)len,
+                                                 (long long)off);
+                       }
+
+                       if (len < sizeof(rec.f)) {
+                               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                 NTDB_LOG_ERROR,
+                                                 "ntdb_check: too short record"
+                                                 " %llu at %llu",
+                                                 (long long)len,
+                                                 (long long)off);
+                       }
+
+                       /* Check that records have correct 0 at end (but may
+                        * not in future). */
+                       if (extra && !features
+                           && rec_magic(&rec.u) != NTDB_CAP_MAGIC) {
+                               const char *p;
+                               char c;
+                               p = ntdb_access_read(ntdb, off + sizeof(rec.u)
+                                                   + klen + dlen, 1, false);
+                               if (NTDB_PTR_IS_ERR(p))
+                                       return NTDB_PTR_ERR(p);
+                               c = *p;
+                               ntdb_access_release(ntdb, p);
+
+                               if (c != '\0') {
+                                       return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                         NTDB_LOG_ERROR,
+                                                         "ntdb_check:"
+                                                         " non-zero extra"
+                                                         " at %llu",
+                                                         (long long)off);
+                               }
+                       }
+               } else {
+                       return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                         NTDB_LOG_ERROR,
+                                         "ntdb_check: Bad magic 0x%llx"
+                                         " at offset %zu",
+                                         (long long)rec_magic(&rec.u),
+                                         (size_t)off);
+               }
+       }
+
+       /* We must have found recovery area if there was one. */
+       if (recovery != 0 && !found_recovery) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_check: expected a recovery area at %zu",
+                                 (size_t)recovery);
+       }
+
+       return NTDB_SUCCESS;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_check_(struct ntdb_context *ntdb,
+                         enum NTDB_ERROR (*check)(NTDB_DATA, NTDB_DATA, void *),
+                         void *data)
+{
+       ntdb_off_t *fr = NULL, *used = NULL, ft, recovery;
+       size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0,
+               num_capabilities = 0;
+       uint64_t features;
+       enum NTDB_ERROR ecode;
+
+       if (ntdb->flags & NTDB_CANT_CHECK) {
+               return ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
+                                 "ntdb_check: database has unknown capability,"
+                                 " cannot check.");
+       }
+
+       ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
+       if (ecode != NTDB_SUCCESS) {
+               return ntdb->last_error = ecode;
+       }
+
+       ecode = ntdb_lock_expand(ntdb, F_RDLCK);
+       if (ecode != NTDB_SUCCESS) {
+               ntdb_allrecord_unlock(ntdb, F_RDLCK);
+               return ntdb->last_error = ecode;
+       }
+
+       ecode = check_header(ntdb, &recovery, &features, &num_capabilities);
+       if (ecode != NTDB_SUCCESS)
+               goto out;
+
+       /* First we do a linear scan, checking all records. */
+       ecode = check_linear(ntdb, &used, &num_used, &fr, &num_free, features,
+                            recovery);
+       if (ecode != NTDB_SUCCESS)
+               goto out;
+
+       for (ft = first_ftable(ntdb); ft; ft = next_ftable(ntdb, ft)) {
+               if (NTDB_OFF_IS_ERR(ft)) {
+                       ecode = NTDB_OFF_TO_ERR(ft);
+                       goto out;
+               }
+               ecode = check_free_table(ntdb, ft, num_ftables, fr, num_free,
+                                        &num_found);
+               if (ecode != NTDB_SUCCESS)
+                       goto out;
+               num_ftables++;
+       }
+
+       /* FIXME: Check key uniqueness? */
+       ecode = check_hash(ntdb, used, num_used, num_ftables + num_capabilities,
+                          check, data);
+       if (ecode != NTDB_SUCCESS)
+               goto out;
+
+       if (num_found != num_free) {
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                  "ntdb_check: Not all entries are in"
+                                  " free table");
+       }
+
+out:
+       ntdb_allrecord_unlock(ntdb, F_RDLCK);
+       ntdb_unlock_expand(ntdb, F_RDLCK);
+       free(fr);
+       free(used);
+       return ntdb->last_error = ecode;
+}
similarity index 100%
rename from lib/tdb2/configure
rename to lib/ntdb/configure
diff --git a/lib/ntdb/doc/TDB_porting.txt b/lib/ntdb/doc/TDB_porting.txt
new file mode 100644 (file)
index 0000000..8df1374
--- /dev/null
@@ -0,0 +1,65 @@
+Interface differences between TDB and NTDB.
+
+- ntdb shares 'struct TDB_DATA' with tdb, but TDB defines the TDB_DATA
+  typedef, whereas ntdb defines NTDB_DATA (ie. both are compatible).
+  If you include both ntdb.h and tdb.h, #include tdb.h first,
+  otherwise you'll get a compile error when tdb.h re-defined struct
+  TDB_DATA.
+
+- ntdb functions return NTDB_SUCCESS (ie 0) on success, and a negative
+  error on failure, whereas tdb functions returned 0 on success, and
+  -1 on failure.  tdb then used tdb_error() to determine the error;
+  this is also supported in ntdb to ease backwards compatibility,
+  though the other form is preferred.
+
+- ntdb's ntdb_fetch() returns an error, tdb's returned the data directly
+  (or tdb_null, and you were supposed to check tdb_error() to find out why).
+
+- ntdb's ntdb_nextkey() frees the old key's dptr, in tdb you needed to do
+  this manually.
+
+- tdb's tdb_open/tdb_open_ex took an explicit hash size.  ntdb's hash table
+  resizes as required.
+
+- ntdb uses a linked list of attribute structures to implement logging and
+  alternate hashes.  tdb used tdb_open_ex, which was not extensible.
+
+- ntdb does locking on read-only databases (ie. O_RDONLY passed to ntdb_open).
+  tdb did not: use the NTDB_NOLOCK flag if you want to suppress locking.
+
+- ntdb's log function is simpler than tdb's log function.  The string is
+  already formatted, and it takes an enum ntdb_log_level not a tdb_debug_level,
+  and which has only three values: NTDB_LOG_ERROR, NTDB_LOG_USE_ERROR and
+  NTDB_LOG_WARNING.
+
+- ntdb provides ntdb_deq() for comparing two NTDB_DATA, and ntdb_mkdata() for
+  creating an NTDB_DATA.
+
+- ntdb's ntdb_name() returns a copy of the name even for NTDB_INTERNAL dbs.
+
+- ntdb does not need tdb_reopen() or tdb_reopen_all().  If you call
+  fork() after during certain operations the child should close the
+  tdb, or complete the operations before continuing to use the tdb:
+
+       ntdb_transaction_start(): child must ntdb_transaction_cancel()
+       ntdb_lockall(): child must call ntdb_unlockall()
+       ntdb_lockall_read(): child must call ntdb_unlockall_read()
+       ntdb_chainlock(): child must call ntdb_chainunlock()
+       ntdb_parse() callback: child must return from ntdb_parse()
+
+- ntdb will not open a non-tdb file, even if O_CREAT is specified.
+
+- There is no ntdb_traverse_read.  For operating on TDB files, you can
+  simulate it by ntdb_add_flag(tdb, NTDB_RDONLY); ntdb_traverse();
+  ntdb_remove_flag(tdb, NTDB_RDONLY).  This may be desirable because
+  traverse on TDB files use a write lock on the entire database
+  unless it's read-only.
+
+- Failure inside a transaction (such as a lock function failing) does
+  not implicitly cancel the transaction; you still need to call
+  ntdb_transaction_cancel().
+
+- There is no NTDB_CLEAR_IF_FIRST flag; it has severe scalability and
+  API problems.  If necessary, you can emulate this by using the open
+  hook and placing a 1-byte lock at offset 4.  If your program forks,
+  you will need to place this lock again in the child.
diff --git a/lib/ntdb/free.c b/lib/ntdb/free.c
new file mode 100644 (file)
index 0000000..0fe6c73
--- /dev/null
@@ -0,0 +1,976 @@
+ /*
+   Trivial Database 2: free list/block handling
+   Copyright (C) Rusty Russell 2010
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/likely/likely.h>
+#include <ccan/ilog/ilog.h>
+#include <time.h>
+#include <assert.h>
+#include <limits.h>
+
+static unsigned fls64(uint64_t val)
+{
+       return ilog64(val);
+}
+
+/* In which bucket would we find a particular record size? (ignoring header) */
+unsigned int size_to_bucket(ntdb_len_t data_len)
+{
+       unsigned int bucket;
+
+       /* We can't have records smaller than this. */
+       assert(data_len >= NTDB_MIN_DATA_LEN);
+
+       /* Ignoring the header... */
+       if (data_len - NTDB_MIN_DATA_LEN <= 64) {
+               /* 0 in bucket 0, 8 in bucket 1... 64 in bucket 8. */
+               bucket = (data_len - NTDB_MIN_DATA_LEN) / 8;
+       } else {
+               /* After that we go power of 2. */
+               bucket = fls64(data_len - NTDB_MIN_DATA_LEN) + 2;
+       }
+
+       if (unlikely(bucket >= NTDB_FREE_BUCKETS))
+               bucket = NTDB_FREE_BUCKETS - 1;
+       return bucket;
+}
+
+ntdb_off_t first_ftable(struct ntdb_context *ntdb)
+{
+       return ntdb_read_off(ntdb, offsetof(struct ntdb_header, free_table));
+}
+
+ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable)
+{
+       return ntdb_read_off(ntdb, ftable + offsetof(struct ntdb_freetable,next));
+}
+
+enum NTDB_ERROR ntdb_ftable_init(struct ntdb_context *ntdb)
+{
+       /* Use reservoir sampling algorithm to select a free list at random. */
+       unsigned int rnd, max = 0, count = 0;
+       ntdb_off_t off;
+
+       ntdb->ftable_off = off = first_ftable(ntdb);
+       ntdb->ftable = 0;
+
+       while (off) {
+               if (NTDB_OFF_IS_ERR(off)) {
+                       return NTDB_OFF_TO_ERR(off);
+               }
+
+               rnd = random();
+               if (rnd >= max) {
+                       ntdb->ftable_off = off;
+                       ntdb->ftable = count;
+                       max = rnd;
+               }
+
+               off = next_ftable(ntdb, off);
+               count++;
+       }
+       return NTDB_SUCCESS;
+}
+
+/* Offset of a given bucket. */
+ntdb_off_t bucket_off(ntdb_off_t ftable_off, unsigned bucket)
+{
+       return ftable_off + offsetof(struct ntdb_freetable, buckets)
+               + bucket * sizeof(ntdb_off_t);
+}
+
+/* Returns free_buckets + 1, or list number to search, or -ve error. */
+static ntdb_off_t find_free_head(struct ntdb_context *ntdb,
+                               ntdb_off_t ftable_off,
+                               ntdb_off_t bucket)
+{
+       /* Speculatively search for a non-zero bucket. */
+       return ntdb_find_nonzero_off(ntdb, bucket_off(ftable_off, 0),
+                                   bucket, NTDB_FREE_BUCKETS);
+}
+
+static void check_list(struct ntdb_context *ntdb, ntdb_off_t b_off)
+{
+#ifdef CCAN_NTDB_DEBUG
+       ntdb_off_t off, prev = 0, first;
+       struct ntdb_free_record r;
+
+       first = off = (ntdb_read_off(ntdb, b_off) & NTDB_OFF_MASK);
+       while (off != 0) {
+               ntdb_read_convert(ntdb, off, &r, sizeof(r));
+               if (frec_magic(&r) != NTDB_FREE_MAGIC)
+                       abort();
+               if (prev && frec_prev(&r) != prev)
+                       abort();
+               prev = off;
+               off = r.next;
+       }
+
+       if (first) {
+               ntdb_read_convert(ntdb, first, &r, sizeof(r));
+               if (frec_prev(&r) != prev)
+                       abort();
+       }
+#endif
+}
+
+/* Remove from free bucket. */
+static enum NTDB_ERROR remove_from_list(struct ntdb_context *ntdb,
+                                      ntdb_off_t b_off, ntdb_off_t r_off,
+                                      const struct ntdb_free_record *r)
+{
+       ntdb_off_t off, prev_next, head;
+       enum NTDB_ERROR ecode;
+
+       /* Is this only element in list?  Zero out bucket, and we're done. */
+       if (frec_prev(r) == r_off)
+               return ntdb_write_off(ntdb, b_off, 0);
+
+       /* off = &r->prev->next */
+       off = frec_prev(r) + offsetof(struct ntdb_free_record, next);
+
+       /* Get prev->next */
+       prev_next = ntdb_read_off(ntdb, off);
+       if (NTDB_OFF_IS_ERR(prev_next))
+               return NTDB_OFF_TO_ERR(prev_next);
+
+       /* If prev->next == 0, we were head: update bucket to point to next. */
+       if (prev_next == 0) {
+               /* We must preserve upper bits. */
+               head = ntdb_read_off(ntdb, b_off);
+               if (NTDB_OFF_IS_ERR(head))
+                       return NTDB_OFF_TO_ERR(head);
+
+               if ((head & NTDB_OFF_MASK) != r_off) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                         "remove_from_list:"
+                                         " %llu head %llu on list %llu",
+                                         (long long)r_off,
+                                         (long long)head,
+                                         (long long)b_off);
+               }
+               head = ((head & ~NTDB_OFF_MASK) | r->next);
+               ecode = ntdb_write_off(ntdb, b_off, head);
+               if (ecode != NTDB_SUCCESS)
+                       return ecode;
+       } else {
+               /* r->prev->next = r->next */
+               ecode = ntdb_write_off(ntdb, off, r->next);
+               if (ecode != NTDB_SUCCESS)
+                       return ecode;
+       }
+
+       /* If we were the tail, off = &head->prev. */
+       if (r->next == 0) {
+               head = ntdb_read_off(ntdb, b_off);
+               if (NTDB_OFF_IS_ERR(head))
+                       return NTDB_OFF_TO_ERR(head);
+               head &= NTDB_OFF_MASK;
+               off = head + offsetof(struct ntdb_free_record, magic_and_prev);
+       } else {
+               /* off = &r->next->prev */
+               off = r->next + offsetof(struct ntdb_free_record,
+                                        magic_and_prev);
+       }
+
+#ifdef CCAN_NTDB_DEBUG
+       /* *off == r */
+       if ((ntdb_read_off(ntdb, off) & NTDB_OFF_MASK) != r_off) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "remove_from_list:"
+                                 " %llu bad prev in list %llu",
+                                 (long long)r_off, (long long)b_off);
+       }
+#endif
+       /* r->next->prev = r->prev */
+       return ntdb_write_off(ntdb, off, r->magic_and_prev);
+}
+
+/* Enqueue in this free bucket: sets coalesce if we've added 128
+ * entries to it. */
+static enum NTDB_ERROR enqueue_in_free(struct ntdb_context *ntdb,
+                                     ntdb_off_t b_off,
+                                     ntdb_off_t off,
+                                     ntdb_len_t len,
+                                     bool *coalesce)
+{
+       struct ntdb_free_record new;
+       enum NTDB_ERROR ecode;
+       ntdb_off_t prev, head;
+       uint64_t magic = (NTDB_FREE_MAGIC << (64 - NTDB_OFF_UPPER_STEAL));
+
+       head = ntdb_read_off(ntdb, b_off);
+       if (NTDB_OFF_IS_ERR(head))
+               return NTDB_OFF_TO_ERR(head);
+
+       /* We only need to set ftable_and_len; rest is set in enqueue_in_free */
+       new.ftable_and_len = ((uint64_t)ntdb->ftable
+                             << (64 - NTDB_OFF_UPPER_STEAL))
+               | len;
+
+       /* new->next = head. */
+       new.next = (head & NTDB_OFF_MASK);
+
+       /* First element?  Prev points to ourselves. */
+       if (!new.next) {
+               new.magic_and_prev = (magic | off);
+       } else {
+               /* new->prev = next->prev */
+               prev = ntdb_read_off(ntdb,
+                                   new.next + offsetof(struct ntdb_free_record,
+                                                       magic_and_prev));
+               new.magic_and_prev = prev;
+               if (frec_magic(&new) != NTDB_FREE_MAGIC) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                         "enqueue_in_free: %llu bad head"
+                                         " prev %llu",
+                                         (long long)new.next,
+                                         (long long)prev);
+               }
+               /* next->prev = new. */
+               ecode = ntdb_write_off(ntdb, new.next
+                                     + offsetof(struct ntdb_free_record,
+                                                magic_and_prev),
+                                     off | magic);
+               if (ecode != NTDB_SUCCESS) {
+                       return ecode;
+               }
+
+#ifdef CCAN_NTDB_DEBUG
+               prev = ntdb_read_off(ntdb, frec_prev(&new)
+                                   + offsetof(struct ntdb_free_record, next));
+               if (prev != 0) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                         "enqueue_in_free:"
+                                         " %llu bad tail next ptr %llu",
+                                         (long long)frec_prev(&new)
+                                         + offsetof(struct ntdb_free_record,
+                                                    next),
+                                         (long long)prev);
+               }
+#endif
+       }
+
+       /* Update enqueue count, but don't set high bit: see NTDB_OFF_IS_ERR */
+       if (*coalesce)
+               head += (1ULL << (64 - NTDB_OFF_UPPER_STEAL));
+       head &= ~(NTDB_OFF_MASK | (1ULL << 63));
+       head |= off;
+
+       ecode = ntdb_write_off(ntdb, b_off, head);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       /* It's time to coalesce if counter wrapped. */
+       if (*coalesce)
+               *coalesce = ((head & ~NTDB_OFF_MASK) == 0);
+
+       return ntdb_write_convert(ntdb, off, &new, sizeof(new));
+}
+
+static ntdb_off_t ftable_offset(struct ntdb_context *ntdb, unsigned int ftable)
+{
+       ntdb_off_t off;
+       unsigned int i;
+
+       if (likely(ntdb->ftable == ftable))
+               return ntdb->ftable_off;
+
+       off = first_ftable(ntdb);
+       for (i = 0; i < ftable; i++) {
+               if (NTDB_OFF_IS_ERR(off)) {
+                       break;
+               }
+               off = next_ftable(ntdb, off);
+       }
+       return off;
+}
+
+/* Note: we unlock the current bucket if fail (-ve), or coalesce (+ve) and
+ * need to blatt the *protect record (which is set to an error). */
+static ntdb_len_t coalesce(struct ntdb_context *ntdb,
+                         ntdb_off_t off, ntdb_off_t b_off,
+                         ntdb_len_t data_len,
+                         ntdb_off_t *protect)
+{
+       ntdb_off_t end;
+       struct ntdb_free_record rec;
+       enum NTDB_ERROR ecode;
+
+       ntdb->stats.alloc_coalesce_tried++;
+       end = off + sizeof(struct ntdb_used_record) + data_len;
+
+       while (end < ntdb->file->map_size) {
+               const struct ntdb_free_record *r;
+               ntdb_off_t nb_off;
+               unsigned ftable, bucket;
+
+               r = ntdb_access_read(ntdb, end, sizeof(*r), true);
+               if (NTDB_PTR_IS_ERR(r)) {
+                       ecode = NTDB_PTR_ERR(r);
+                       goto err;
+               }
+
+               if (frec_magic(r) != NTDB_FREE_MAGIC
+                   || frec_ftable(r) == NTDB_FTABLE_NONE) {
+                       ntdb_access_release(ntdb, r);
+                       break;
+               }
+
+               ftable = frec_ftable(r);
+               bucket = size_to_bucket(frec_len(r));
+               nb_off = ftable_offset(ntdb, ftable);
+               if (NTDB_OFF_IS_ERR(nb_off)) {
+                       ntdb_access_release(ntdb, r);
+                       ecode = NTDB_OFF_TO_ERR(nb_off);
+                       goto err;
+               }
+               nb_off = bucket_off(nb_off, bucket);
+               ntdb_access_release(ntdb, r);
+
+               /* We may be violating lock order here, so best effort. */
+               if (ntdb_lock_free_bucket(ntdb, nb_off, NTDB_LOCK_NOWAIT)
+                   != NTDB_SUCCESS) {
+                       ntdb->stats.alloc_coalesce_lockfail++;
+                       break;
+               }
+
+               /* Now we have lock, re-check. */
+               ecode = ntdb_read_convert(ntdb, end, &rec, sizeof(rec));
+               if (ecode != NTDB_SUCCESS) {
+                       ntdb_unlock_free_bucket(ntdb, nb_off);
+                       goto err;
+               }
+
+               if (unlikely(frec_magic(&rec) != NTDB_FREE_MAGIC)) {
+                       ntdb->stats.alloc_coalesce_race++;
+                       ntdb_unlock_free_bucket(ntdb, nb_off);
+                       break;
+               }
+
+               if (unlikely(frec_ftable(&rec) != ftable)
+                   || unlikely(size_to_bucket(frec_len(&rec)) != bucket)) {
+                       ntdb->stats.alloc_coalesce_race++;
+                       ntdb_unlock_free_bucket(ntdb, nb_off);
+                       break;
+               }
+
+               /* Did we just mess up a record you were hoping to use? */
+               if (end == *protect) {
+                       ntdb->stats.alloc_coalesce_iterate_clash++;
+                       *protect = NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST);
+               }
+
+               ecode = remove_from_list(ntdb, nb_off, end, &rec);
+               check_list(ntdb, nb_off);
+               if (ecode != NTDB_SUCCESS) {
+                       ntdb_unlock_free_bucket(ntdb, nb_off);
+                       goto err;
+               }
+
+               end += sizeof(struct ntdb_used_record) + frec_len(&rec);
+               ntdb_unlock_free_bucket(ntdb, nb_off);
+               ntdb->stats.alloc_coalesce_num_merged++;
+       }
+
+       /* Didn't find any adjacent free? */
+       if (end == off + sizeof(struct ntdb_used_record) + data_len)
+               return 0;
+
+       /* Before we expand, check this isn't one you wanted protected? */
+       if (off == *protect) {
+               *protect = NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS);
+               ntdb->stats.alloc_coalesce_iterate_clash++;
+       }
+
+       /* OK, expand initial record */
+       ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+       if (ecode != NTDB_SUCCESS) {
+               goto err;
+       }
+
+       if (frec_len(&rec) != data_len) {
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                  "coalesce: expected data len %zu not %zu",
+                                  (size_t)data_len, (size_t)frec_len(&rec));
+               goto err;
+       }
+
+       ecode = remove_from_list(ntdb, b_off, off, &rec);
+       check_list(ntdb, b_off);
+       if (ecode != NTDB_SUCCESS) {
+               goto err;
+       }
+
+       /* Try locking violation first.  We don't allow coalesce recursion! */
+       ecode = add_free_record(ntdb, off, end - off, NTDB_LOCK_NOWAIT, false);
+       if (ecode != NTDB_SUCCESS) {
+               /* Need to drop lock.  Can't rely on anything stable. */
+               ntdb->stats.alloc_coalesce_lockfail++;
+               *protect = NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT);
+
+               /* We have to drop this to avoid deadlocks, so make sure record
+                * doesn't get coalesced by someone else! */
+               rec.ftable_and_len = (NTDB_FTABLE_NONE
+                                     << (64 - NTDB_OFF_UPPER_STEAL))
+                       | (end - off - sizeof(struct ntdb_used_record));
+               ecode = ntdb_write_off(ntdb,
+                                     off + offsetof(struct ntdb_free_record,
+                                                    ftable_and_len),
+                                     rec.ftable_and_len);
+               if (ecode != NTDB_SUCCESS) {
+                       goto err;
+               }
+
+               ntdb_unlock_free_bucket(ntdb, b_off);
+
+               ecode = add_free_record(ntdb, off, end - off, NTDB_LOCK_WAIT,
+                                       false);
+               if (ecode != NTDB_SUCCESS) {
+                       return NTDB_ERR_TO_OFF(ecode);
+               }
+       } else if (NTDB_OFF_IS_ERR(*protect)) {
+               /* For simplicity, we always drop lock if they can't continue */
+               ntdb_unlock_free_bucket(ntdb, b_off);
+       }
+       ntdb->stats.alloc_coalesce_succeeded++;
+
+       /* Return usable length. */
+       return end - off - sizeof(struct ntdb_used_record);
+
+err:
+       /* To unify error paths, we *always* unlock bucket on error. */
+       ntdb_unlock_free_bucket(ntdb, b_off);
+       return NTDB_ERR_TO_OFF(ecode);
+}
+
+/* List is locked: we unlock it. */
+static enum NTDB_ERROR coalesce_list(struct ntdb_context *ntdb,
+                                   ntdb_off_t ftable_off,
+                                   ntdb_off_t b_off,
+                                   unsigned int limit)
+{
+       enum NTDB_ERROR ecode;
+       ntdb_off_t off;
+
+       off = ntdb_read_off(ntdb, b_off);
+       if (NTDB_OFF_IS_ERR(off)) {
+               ecode = NTDB_OFF_TO_ERR(off);
+               goto unlock_err;
+       }
+       /* A little bit of paranoia: counter should be 0. */
+       off &= NTDB_OFF_MASK;
+
+       while (off && limit--) {
+               struct ntdb_free_record rec;
+               ntdb_len_t coal;
+               ntdb_off_t next;
+
+               ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+               if (ecode != NTDB_SUCCESS)
+                       goto unlock_err;
+
+               next = rec.next;
+               coal = coalesce(ntdb, off, b_off, frec_len(&rec), &next);
+               if (NTDB_OFF_IS_ERR(coal)) {
+                       /* This has already unlocked on error. */
+                       return NTDB_OFF_TO_ERR(coal);
+               }
+               if (NTDB_OFF_IS_ERR(next)) {
+                       /* Coalescing had to unlock, so stop. */
+                       return NTDB_SUCCESS;
+               }
+               /* Keep going if we're doing well... */
+               limit += size_to_bucket(coal / 16 + NTDB_MIN_DATA_LEN);
+               off = next;
+       }
+
+       /* Now, move those elements to the tail of the list so we get something
+        * else next time. */
+       if (off) {
+               struct ntdb_free_record oldhrec, newhrec, oldtrec, newtrec;
+               ntdb_off_t oldhoff, oldtoff, newtoff;
+
+               /* The record we were up to is the new head. */
+               ecode = ntdb_read_convert(ntdb, off, &newhrec, sizeof(newhrec));
+               if (ecode != NTDB_SUCCESS)
+                       goto unlock_err;
+
+               /* Get the new tail. */
+               newtoff = frec_prev(&newhrec);
+               ecode = ntdb_read_convert(ntdb, newtoff, &newtrec,
+                                        sizeof(newtrec));
+               if (ecode != NTDB_SUCCESS)
+                       goto unlock_err;
+
+               /* Get the old head. */
+               oldhoff = ntdb_read_off(ntdb, b_off);
+               if (NTDB_OFF_IS_ERR(oldhoff)) {
+                       ecode = NTDB_OFF_TO_ERR(oldhoff);
+                       goto unlock_err;
+               }
+
+               /* This could happen if they all coalesced away. */
+               if (oldhoff == off)
+                       goto out;
+
+               ecode = ntdb_read_convert(ntdb, oldhoff, &oldhrec,
+                                        sizeof(oldhrec));
+               if (ecode != NTDB_SUCCESS)
+                       goto unlock_err;
+
+               /* Get the old tail. */
+               oldtoff = frec_prev(&oldhrec);
+               ecode = ntdb_read_convert(ntdb, oldtoff, &oldtrec,
+                                        sizeof(oldtrec));
+               if (ecode != NTDB_SUCCESS)
+                       goto unlock_err;
+
+               /* Old tail's next points to old head. */
+               oldtrec.next = oldhoff;
+
+               /* Old head's prev points to old tail. */
+               oldhrec.magic_and_prev
+                       = (NTDB_FREE_MAGIC << (64 - NTDB_OFF_UPPER_STEAL))
+                       | oldtoff;
+
+               /* New tail's next is 0. */
+               newtrec.next = 0;
+
+               /* Write out the modified versions. */
+               ecode = ntdb_write_convert(ntdb, oldtoff, &oldtrec,
+                                         sizeof(oldtrec));
+               if (ecode != NTDB_SUCCESS)
+                       goto unlock_err;
+
+               ecode = ntdb_write_convert(ntdb, oldhoff, &oldhrec,
+                                         sizeof(oldhrec));
+               if (ecode != NTDB_SUCCESS)
+                       goto unlock_err;
+
+               ecode = ntdb_write_convert(ntdb, newtoff, &newtrec,
+                                         sizeof(newtrec));
+               if (ecode != NTDB_SUCCESS)
+                       goto unlock_err;
+
+               /* And finally link in new head. */
+               ecode = ntdb_write_off(ntdb, b_off, off);
+               if (ecode != NTDB_SUCCESS)
+                       goto unlock_err;
+       }
+out:
+       ntdb_unlock_free_bucket(ntdb, b_off);
+       return NTDB_SUCCESS;
+
+unlock_err:
+       ntdb_unlock_free_bucket(ntdb, b_off);
+       return ecode;
+}
+
+/* List must not be locked if coalesce_ok is set. */
+enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb,
+                              ntdb_off_t off, ntdb_len_t len_with_header,
+                              enum ntdb_lock_flags waitflag,
+                              bool coalesce_ok)
+{
+       ntdb_off_t b_off;
+       ntdb_len_t len;
+       enum NTDB_ERROR ecode;
+
+       assert(len_with_header >= sizeof(struct ntdb_free_record));
+
+       len = len_with_header - sizeof(struct ntdb_used_record);
+
+       b_off = bucket_off(ntdb->ftable_off, size_to_bucket(len));
+       ecode = ntdb_lock_free_bucket(ntdb, b_off, waitflag);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       ecode = enqueue_in_free(ntdb, b_off, off, len, &coalesce_ok);
+       check_list(ntdb, b_off);
+
+       /* Coalescing unlocks free list. */
+       if (!ecode && coalesce_ok)
+               ecode = coalesce_list(ntdb, ntdb->ftable_off, b_off, 2);
+       else
+               ntdb_unlock_free_bucket(ntdb, b_off);
+       return ecode;
+}
+
+static size_t adjust_size(size_t keylen, size_t datalen)
+{
+       size_t size = keylen + datalen;
+
+       if (size < NTDB_MIN_DATA_LEN)
+               size = NTDB_MIN_DATA_LEN;
+
+       /* Round to next uint64_t boundary. */
+       return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL);
+}
+
+/* If we have enough left over to be useful, split that off. */
+static size_t record_leftover(size_t keylen, size_t datalen,
+                             bool want_extra, size_t total_len)
+{
+       ssize_t leftover;
+
+       if (want_extra)
+               datalen += datalen / 2;
+       leftover = total_len - adjust_size(keylen, datalen);
+
+       if (leftover < (ssize_t)sizeof(struct ntdb_free_record))
+               return 0;
+
+       return leftover;
+}
+
+/* We need size bytes to put our key and data in. */
+static ntdb_off_t lock_and_alloc(struct ntdb_context *ntdb,
+                               ntdb_off_t ftable_off,
+                               ntdb_off_t bucket,
+                               size_t keylen, size_t datalen,
+                               bool want_extra,
+                               unsigned magic,
+                               unsigned hashlow)
+{
+       ntdb_off_t off, b_off,best_off;
+       struct ntdb_free_record best = { 0 };
+       double multiplier;
+       size_t size = adjust_size(keylen, datalen);
+       enum NTDB_ERROR ecode;
+
+       ntdb->stats.allocs++;
+       b_off = bucket_off(ftable_off, bucket);
+
+       /* FIXME: Try non-blocking wait first, to measure contention. */
+       /* Lock this bucket. */
+       ecode = ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT);
+       if (ecode != NTDB_SUCCESS) {
+               return NTDB_ERR_TO_OFF(ecode);
+       }
+
+       best.ftable_and_len = -1ULL;
+       best_off = 0;
+
+       /* Get slack if we're after extra. */
+       if (want_extra)
+               multiplier = 1.5;
+       else
+               multiplier = 1.0;
+
+       /* Walk the list to see if any are large enough, getting less fussy
+        * as we go. */
+       off = ntdb_read_off(ntdb, b_off);
+       if (NTDB_OFF_IS_ERR(off)) {
+               ecode = NTDB_OFF_TO_ERR(off);
+               goto unlock_err;
+       }
+       off &= NTDB_OFF_MASK;
+
+       while (off) {
+               const struct ntdb_free_record *r;
+               ntdb_len_t len;
+               ntdb_off_t next;
+
+               r = ntdb_access_read(ntdb, off, sizeof(*r), true);
+               if (NTDB_PTR_IS_ERR(r)) {
+                       ecode = NTDB_PTR_ERR(r);
+                       goto unlock_err;
+               }
+
+               if (frec_magic(r) != NTDB_FREE_MAGIC) {
+                       ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                          "lock_and_alloc:"
+                                          " %llu non-free 0x%llx",
+                                          (long long)off,
+                                          (long long)r->magic_and_prev);
+                       ntdb_access_release(ntdb, r);
+                       goto unlock_err;
+               }
+
+               if (frec_len(r) >= size && frec_len(r) < frec_len(&best)) {
+                       best_off = off;
+                       best = *r;
+               }
+
+               if (frec_len(&best) <= size * multiplier && best_off) {
+                       ntdb_access_release(ntdb, r);
+                       break;
+               }
+
+               multiplier *= 1.01;
+
+               next = r->next;
+               len = frec_len(r);
+               ntdb_access_release(ntdb, r);
+               off = next;
+       }
+
+       /* If we found anything at all, use it. */
+       if (best_off) {
+               struct ntdb_used_record rec;
+               size_t leftover;
+
+               /* We're happy with this size: take it. */
+               ecode = remove_from_list(ntdb, b_off, best_off, &best);
+               check_list(ntdb, b_off);
+               if (ecode != NTDB_SUCCESS) {
+                       goto unlock_err;
+               }
+
+               leftover = record_leftover(keylen, datalen, want_extra,
+                                          frec_len(&best));
+
+               assert(keylen + datalen + leftover <= frec_len(&best));
+               /* We need to mark non-free before we drop lock, otherwise
+                * coalesce() could try to merge it! */
+               ecode = set_header(ntdb, &rec, magic, keylen, datalen,
+                                  frec_len(&best) - leftover, hashlow);
+               if (ecode != NTDB_SUCCESS) {
+                       goto unlock_err;
+               }
+
+               ecode = ntdb_write_convert(ntdb, best_off, &rec, sizeof(rec));
+               if (ecode != NTDB_SUCCESS) {
+                       goto unlock_err;
+               }
+
+               /* For futureproofing, we put a 0 in any unused space. */
+               if (rec_extra_padding(&rec)) {
+                       ecode = ntdb->io->twrite(ntdb, best_off + sizeof(rec)
+                                               + keylen + datalen, "", 1);
+                       if (ecode != NTDB_SUCCESS) {
+                               goto unlock_err;
+                       }
+               }
+
+               /* Bucket of leftover will be <= current bucket, so nested
+                * locking is allowed. */
+               if (leftover) {
+                       ntdb->stats.alloc_leftover++;
+                       ecode = add_free_record(ntdb,
+                                               best_off + sizeof(rec)
+                                               + frec_len(&best) - leftover,
+                                               leftover, NTDB_LOCK_WAIT, false);
+                       if (ecode != NTDB_SUCCESS) {
+                               best_off = NTDB_ERR_TO_OFF(ecode);
+                       }
+               }
+               ntdb_unlock_free_bucket(ntdb, b_off);
+
+               return best_off;
+       }
+
+       ntdb_unlock_free_bucket(ntdb, b_off);
+       return 0;
+
+unlock_err:
+       ntdb_unlock_free_bucket(ntdb, b_off);
+       return NTDB_ERR_TO_OFF(ecode);
+}
+
+/* Get a free block from current free list, or 0 if none, -ve on error. */
+static ntdb_off_t get_free(struct ntdb_context *ntdb,
+                         size_t keylen, size_t datalen, bool want_extra,
+                         unsigned magic, unsigned hashlow)
+{
+       ntdb_off_t off, ftable_off;
+       ntdb_off_t start_b, b, ftable;
+       bool wrapped = false;
+
+       /* If they are growing, add 50% to get to higher bucket. */
+       if (want_extra)
+               start_b = size_to_bucket(adjust_size(keylen,
+                                                    datalen + datalen / 2));
+       else
+               start_b = size_to_bucket(adjust_size(keylen, datalen));
+
+       ftable_off = ntdb->ftable_off;
+       ftable = ntdb->ftable;
+       while (!wrapped || ftable_off != ntdb->ftable_off) {
+               /* Start at exact size bucket, and search up... */
+               for (b = find_free_head(ntdb, ftable_off, start_b);
+                    b < NTDB_FREE_BUCKETS;
+                    b = find_free_head(ntdb, ftable_off, b + 1)) {
+                       /* Try getting one from list. */
+                       off = lock_and_alloc(ntdb, ftable_off,
+                                            b, keylen, datalen, want_extra,
+                                            magic, hashlow);
+                       if (NTDB_OFF_IS_ERR(off))
+                               return off;
+                       if (off != 0) {
+                               if (b == start_b)
+                                       ntdb->stats.alloc_bucket_exact++;
+                               if (b == NTDB_FREE_BUCKETS - 1)
+                                       ntdb->stats.alloc_bucket_max++;
+                               /* Worked?  Stay using this list. */
+                               ntdb->ftable_off = ftable_off;
+                               ntdb->ftable = ftable;
+                               return off;
+                       }
+                       /* Didn't work.  Try next bucket. */
+               }
+
+               if (NTDB_OFF_IS_ERR(b)) {
+                       return b;
+               }
+
+               /* Hmm, try next table. */
+               ftable_off = next_ftable(ntdb, ftable_off);
+               if (NTDB_OFF_IS_ERR(ftable_off)) {
+                       return ftable_off;
+               }
+               ftable++;
+
+               if (ftable_off == 0) {
+                       wrapped = true;
+                       ftable_off = first_ftable(ntdb);
+                       if (NTDB_OFF_IS_ERR(ftable_off)) {
+                               return ftable_off;
+                       }
+                       ftable = 0;
+               }
+       }
+
+       return 0;
+}
+
+enum NTDB_ERROR set_header(struct ntdb_context *ntdb,
+                         struct ntdb_used_record *rec,
+                         unsigned magic, uint64_t keylen, uint64_t datalen,
+                         uint64_t actuallen, unsigned hashlow)
+{
+       uint64_t keybits = (fls64(keylen) + 1) / 2;
+
+       /* Use bottom bits of hash, so it's independent of hash table size. */
+       rec->magic_and_meta = (hashlow & ((1 << 11)-1))
+               | ((actuallen - (keylen + datalen)) << 11)
+               | (keybits << 43)
+               | ((uint64_t)magic << 48);
+       rec->key_and_data_len = (keylen | (datalen << (keybits*2)));
+
+       /* Encoding can fail on big values. */
+       if (rec_key_length(rec) != keylen
+           || rec_data_length(rec) != datalen
+           || rec_extra_padding(rec) != actuallen - (keylen + datalen)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                 "Could not encode k=%llu,d=%llu,a=%llu",
+                                 (long long)keylen, (long long)datalen,
+                                 (long long)actuallen);
+       }
+       return NTDB_SUCCESS;
+}
+
+/* You need 'size', this tells you how much you should expand by. */
+ntdb_off_t ntdb_expand_adjust(ntdb_off_t map_size, ntdb_off_t size)
+{
+       ntdb_off_t new_size, top_size;
+
+       /* limit size in order to avoid using up huge amounts of memory for
+        * in memory tdbs if an oddball huge record creeps in */
+       if (size > 100 * 1024) {
+               top_size = map_size + size * 2;
+       } else {
+               top_size = map_size + size * 100;
+       }
+
+       /* always make room for at least top_size more records, and at
+          least 25% more space. if the DB is smaller than 100MiB,
+          otherwise grow it by 10% only. */
+       if (map_size > 100 * 1024 * 1024) {
+               new_size = map_size * 1.10;
+       } else {
+               new_size = map_size * 1.25;
+       }
+
+       /* Round the database up to a multiple of the page size */
+       if (new_size < top_size)
+               new_size = top_size;
+       return new_size - map_size;
+}
+
+/* Expand the database. */
+static enum NTDB_ERROR ntdb_expand(struct ntdb_context *ntdb, ntdb_len_t size)
+{
+       uint64_t old_size;
+       ntdb_len_t wanted;
+       enum NTDB_ERROR ecode;
+
+       /* Need to hold a hash lock to expand DB: transactions rely on it. */
+       if (!(ntdb->flags & NTDB_NOLOCK)
+           && !ntdb->file->allrecord_lock.count && !ntdb_has_hash_locks(ntdb)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_expand: must hold lock during expand");
+       }
+
+       /* Only one person can expand file at a time. */
+       ecode = ntdb_lock_expand(ntdb, F_WRLCK);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       /* Someone else may have expanded the file, so retry. */
+       old_size = ntdb->file->map_size;
+       ntdb->io->oob(ntdb, ntdb->file->map_size, 1, true);
+       if (ntdb->file->map_size != old_size) {
+               ntdb_unlock_expand(ntdb, F_WRLCK);
+               return NTDB_SUCCESS;
+       }
+
+       /* Overallocate. */
+       wanted = ntdb_expand_adjust(old_size, size);
+       /* We need room for the record header too. */
+       wanted = adjust_size(0, sizeof(struct ntdb_used_record) + wanted);
+
+       ecode = ntdb->io->expand_file(ntdb, wanted);
+       if (ecode != NTDB_SUCCESS) {
+               ntdb_unlock_expand(ntdb, F_WRLCK);
+               return ecode;
+       }
+
+       /* We need to drop this lock before adding free record. */
+       ntdb_unlock_expand(ntdb, F_WRLCK);
+
+       ntdb->stats.expands++;
+       return add_free_record(ntdb, old_size, wanted, NTDB_LOCK_WAIT, true);
+}
+
+/* This won't fail: it will expand the database if it has to. */
+ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen,
+               uint64_t hash, unsigned magic, bool growing)
+{
+       ntdb_off_t off;
+
+       /* We can't hold pointers during this: we could unmap! */
+       assert(!ntdb->direct_access);
+
+       for (;;) {
+               enum NTDB_ERROR ecode;
+               off = get_free(ntdb, keylen, datalen, growing, magic, hash);
+               if (likely(off != 0))
+                       break;
+
+               ecode = ntdb_expand(ntdb, adjust_size(keylen, datalen));
+               if (ecode != NTDB_SUCCESS) {
+                       return NTDB_ERR_TO_OFF(ecode);
+               }
+       }
+
+       return off;
+}
diff --git a/lib/ntdb/hash.c b/lib/ntdb/hash.c
new file mode 100644 (file)
index 0000000..95b98c0
--- /dev/null
@@ -0,0 +1,894 @@
+ /*
+   Trivial Database 2: hash handling
+   Copyright (C) Rusty Russell 2010
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/hash/hash.h>
+#include <assert.h>
+
+/* Default hash function. */
+uint64_t ntdb_jenkins_hash(const void *key, size_t length, uint64_t seed,
+                         void *unused)
+{
+       uint64_t ret;
+       /* hash64_stable assumes lower bits are more important; they are a
+        * slightly better hash.  We use the upper bits first, so swap them. */
+       ret = hash64_stable((const unsigned char *)key, length, seed);
+       return (ret >> 32) | (ret << 32);
+}
+
+uint64_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len)
+{
+       return ntdb->hash_fn(ptr, len, ntdb->hash_seed, ntdb->hash_data);
+}
+
+uint64_t hash_record(struct ntdb_context *ntdb, ntdb_off_t off)
+{
+       const struct ntdb_used_record *r;
+       const void *key;
+       uint64_t klen, hash;
+
+       r = ntdb_access_read(ntdb, off, sizeof(*r), true);
+       if (NTDB_PTR_IS_ERR(r)) {
+               /* FIXME */
+               return 0;
+       }
+
+       klen = rec_key_length(r);
+       ntdb_access_release(ntdb, r);
+
+       key = ntdb_access_read(ntdb, off + sizeof(*r), klen, false);
+       if (NTDB_PTR_IS_ERR(key)) {
+               return 0;
+       }
+
+       hash = ntdb_hash(ntdb, key, klen);
+       ntdb_access_release(ntdb, key);
+       return hash;
+}
+
+/* Get bits from a value. */
+static uint32_t bits_from(uint64_t val, unsigned start, unsigned num)
+{
+       assert(num <= 32);
+       return (val >> start) & ((1U << num) - 1);
+}
+
+/* We take bits from the top: that way we can lock whole sections of the hash
+ * by using lock ranges. */
+static uint32_t use_bits(struct hash_info *h, unsigned num)
+{
+       h->hash_used += num;
+       return bits_from(h->h, 64 - h->hash_used, num);
+}
+
+static ntdb_bool_err key_matches(struct ntdb_context *ntdb,
+                               const struct ntdb_used_record *rec,
+                               ntdb_off_t off,
+                               const NTDB_DATA *key)
+{
+       ntdb_bool_err ret = false;
+       const char *rkey;
+
+       if (rec_key_length(rec) != key->dsize) {
+               ntdb->stats.compare_wrong_keylen++;
+               return ret;
+       }
+
+       rkey = ntdb_access_read(ntdb, off + sizeof(*rec), key->dsize, false);
+       if (NTDB_PTR_IS_ERR(rkey)) {
+               return (ntdb_bool_err)NTDB_PTR_ERR(rkey);
+       }
+       if (memcmp(rkey, key->dptr, key->dsize) == 0)
+               ret = true;
+       else
+               ntdb->stats.compare_wrong_keycmp++;
+       ntdb_access_release(ntdb, rkey);
+       return ret;
+}
+
+/* Does entry match? */
+static ntdb_bool_err match(struct ntdb_context *ntdb,
+                         struct hash_info *h,
+                         const NTDB_DATA *key,
+                         ntdb_off_t val,
+                         struct ntdb_used_record *rec)
+{
+       ntdb_off_t off;
+       enum NTDB_ERROR ecode;
+
+       ntdb->stats.compares++;
+       /* Desired bucket must match. */
+       if (h->home_bucket != (val & NTDB_OFF_HASH_GROUP_MASK)) {
+               ntdb->stats.compare_wrong_bucket++;
+               return false;
+       }
+
+       /* Top bits of offset == next bits of hash. */
+       if (bits_from(val, NTDB_OFF_HASH_EXTRA_BIT, NTDB_OFF_UPPER_STEAL_EXTRA)
+           != bits_from(h->h, 64 - h->hash_used - NTDB_OFF_UPPER_STEAL_EXTRA,
+                   NTDB_OFF_UPPER_STEAL_EXTRA)) {
+               ntdb->stats.compare_wrong_offsetbits++;
+               return false;
+       }
+
+       off = val & NTDB_OFF_MASK;
+       ecode = ntdb_read_convert(ntdb, off, rec, sizeof(*rec));
+       if (ecode != NTDB_SUCCESS) {
+               return (ntdb_bool_err)ecode;
+       }
+
+       if ((h->h & ((1 << 11)-1)) != rec_hash(rec)) {
+               ntdb->stats.compare_wrong_rechash++;
+               return false;
+       }
+
+       return key_matches(ntdb, rec, off, key);
+}
+
+static ntdb_off_t hbucket_off(ntdb_off_t group_start, unsigned bucket)
+{
+       return group_start
+               + (bucket % (1 << NTDB_HASH_GROUP_BITS)) * sizeof(ntdb_off_t);
+}
+
+bool is_subhash(ntdb_off_t val)
+{
+       return (val >> NTDB_OFF_UPPER_STEAL_SUBHASH_BIT) & 1;
+}
+
+/* FIXME: Guess the depth, don't over-lock! */
+static ntdb_off_t hlock_range(ntdb_off_t group, ntdb_off_t *size)
+{
+       *size = 1ULL << (64 - (NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS));
+       return group << (64 - (NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS));
+}
+
+static ntdb_off_t COLD find_in_chain(struct ntdb_context *ntdb,
+                                   NTDB_DATA key,
+                                   ntdb_off_t chain,
+                                   struct hash_info *h,
+                                   struct ntdb_used_record *rec,
+                                   struct traverse_info *tinfo)
+{
+       ntdb_off_t off, next;
+       enum NTDB_ERROR ecode;
+
+       /* In case nothing is free, we set these to zero. */
+       h->home_bucket = h->found_bucket = 0;
+
+       for (off = chain; off; off = next) {
+               unsigned int i;
+
+               h->group_start = off;
+               ecode = ntdb_read_convert(ntdb, off, h->group, sizeof(h->group));
+               if (ecode != NTDB_SUCCESS) {
+                       return NTDB_ERR_TO_OFF(ecode);
+               }
+
+               for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
+                       ntdb_off_t recoff;
+                       if (!h->group[i]) {
+                               /* Remember this empty bucket. */
+                               h->home_bucket = h->found_bucket = i;
+                               continue;
+                       }
+
+                       /* We can insert extra bits via add_to_hash
+                        * empty bucket logic. */
+                       recoff = h->group[i] & NTDB_OFF_MASK;
+                       ecode = ntdb_read_convert(ntdb, recoff, rec,
+                                                sizeof(*rec));
+                       if (ecode != NTDB_SUCCESS) {
+                               return NTDB_ERR_TO_OFF(ecode);
+                       }
+
+                       ecode = NTDB_OFF_TO_ERR(key_matches(ntdb, rec, recoff,
+                                                          &key));
+                       if (ecode < 0) {
+                               return NTDB_ERR_TO_OFF(ecode);
+                       }
+                       if (ecode == (enum NTDB_ERROR)1) {
+                               h->home_bucket = h->found_bucket = i;
+
+                               if (tinfo) {
+                                       tinfo->levels[tinfo->num_levels]
+                                               .hashtable = off;
+                                       tinfo->levels[tinfo->num_levels]
+                                               .total_buckets
+                                               = 1 << NTDB_HASH_GROUP_BITS;
+                                       tinfo->levels[tinfo->num_levels].entry
+                                               = i;
+                                       tinfo->num_levels++;
+                               }
+                               return recoff;
+                       }
+               }
+               next = ntdb_read_off(ntdb, off
+                                   + offsetof(struct ntdb_chain, next));
+               if (NTDB_OFF_IS_ERR(next)) {
+                       return next;
+               }
+               if (next)
+                       next += sizeof(struct ntdb_used_record);
+       }
+       return 0;
+}
+
+/* This is the core routine which searches the hashtable for an entry.
+ * On error, no locks are held and -ve is returned.
+ * Otherwise, hinfo is filled in (and the optional tinfo).
+ * If not found, the return value is 0.
+ * If found, the return value is the offset, and *rec is the record. */
+ntdb_off_t find_and_lock(struct ntdb_context *ntdb,
+                       NTDB_DATA key,
+                       int ltype,
+                       struct hash_info *h,
+                       struct ntdb_used_record *rec,
+                       struct traverse_info *tinfo)
+{
+       uint32_t i, group;
+       ntdb_off_t hashtable;
+       enum NTDB_ERROR ecode;
+
+       h->h = ntdb_hash(ntdb, key.dptr, key.dsize);
+       h->hash_used = 0;
+       group = use_bits(h, NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS);
+       h->home_bucket = use_bits(h, NTDB_HASH_GROUP_BITS);
+
+       h->hlock_start = hlock_range(group, &h->hlock_range);
+       ecode = ntdb_lock_hashes(ntdb, h->hlock_start, h->hlock_range, ltype,
+                               NTDB_LOCK_WAIT);
+       if (ecode != NTDB_SUCCESS) {
+               return NTDB_ERR_TO_OFF(ecode);
+       }
+
+       hashtable = offsetof(struct ntdb_header, hashtable);
+       if (tinfo) {
+               tinfo->toplevel_group = group;
+               tinfo->num_levels = 1;
+               tinfo->levels[0].entry = 0;
+               tinfo->levels[0].hashtable = hashtable
+                       + (group << NTDB_HASH_GROUP_BITS) * sizeof(ntdb_off_t);
+               tinfo->levels[0].total_buckets = 1 << NTDB_HASH_GROUP_BITS;
+       }
+
+       while (h->hash_used <= 64) {
+               /* Read in the hash group. */
+               h->group_start = hashtable
+                       + group * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS);
+
+               ecode = ntdb_read_convert(ntdb, h->group_start, &h->group,
+                                        sizeof(h->group));
+               if (ecode != NTDB_SUCCESS) {
+                       goto fail;
+               }
+
+               /* Pointer to another hash table?  Go down... */
+               if (is_subhash(h->group[h->home_bucket])) {
+                       hashtable = (h->group[h->home_bucket] & NTDB_OFF_MASK)
+                               + sizeof(struct ntdb_used_record);
+                       if (tinfo) {
+                               /* When we come back, use *next* bucket */
+                               tinfo->levels[tinfo->num_levels-1].entry
+                                       += h->home_bucket + 1;
+                       }
+                       group = use_bits(h, NTDB_SUBLEVEL_HASH_BITS
+                                        - NTDB_HASH_GROUP_BITS);
+                       h->home_bucket = use_bits(h, NTDB_HASH_GROUP_BITS);
+                       if (tinfo) {
+                               tinfo->levels[tinfo->num_levels].hashtable
+                                       = hashtable;
+                               tinfo->levels[tinfo->num_levels].total_buckets
+                                       = 1 << NTDB_SUBLEVEL_HASH_BITS;
+                               tinfo->levels[tinfo->num_levels].entry
+                                       = group << NTDB_HASH_GROUP_BITS;
+                               tinfo->num_levels++;
+                       }
+                       continue;
+               }
+
+               /* It's in this group: search (until 0 or all searched) */
+               for (i = 0, h->found_bucket = h->home_bucket;
+                    i < (1 << NTDB_HASH_GROUP_BITS);
+                    i++, h->found_bucket = ((h->found_bucket+1)
+                                            % (1 << NTDB_HASH_GROUP_BITS))) {
+                       ntdb_bool_err berr;
+                       if (is_subhash(h->group[h->found_bucket]))
+                               continue;
+
+                       if (!h->group[h->found_bucket])
+                               break;
+
+                       berr = match(ntdb, h, &key, h->group[h->found_bucket],
+                                    rec);
+                       if (berr < 0) {
+                               ecode = NTDB_OFF_TO_ERR(berr);
+                               goto fail;
+                       }
+                       if (berr) {
+                               if (tinfo) {
+                                       tinfo->levels[tinfo->num_levels-1].entry
+                                               += h->found_bucket;
+                               }
+                               return h->group[h->found_bucket] & NTDB_OFF_MASK;
+                       }
+               }
+               /* Didn't find it: h indicates where it would go. */
+               return 0;
+       }
+
+       return find_in_chain(ntdb, key, hashtable, h, rec, tinfo);
+
+fail:
+       ntdb_unlock_hashes(ntdb, h->hlock_start, h->hlock_range, ltype);
+       return NTDB_ERR_TO_OFF(ecode);
+}
+
+/* I wrote a simple test, expanding a hash to 2GB, for the following
+ * cases:
+ * 1) Expanding all the buckets at once,
+ * 2) Expanding the bucket we wanted to place the new entry into.
+ * 3) Expanding the most-populated bucket,
+ *
+ * I measured the worst/average/best density during this process.
+ * 1) 3%/16%/30%
+ * 2) 4%/20%/38%
+ * 3) 6%/22%/41%
+ *
+ * So we figure out the busiest bucket for the moment.
+ */
+static unsigned fullest_bucket(struct ntdb_context *ntdb,
+                              const ntdb_off_t *group,
+                              unsigned new_bucket)
+{
+       unsigned counts[1 << NTDB_HASH_GROUP_BITS] = { 0 };
+       unsigned int i, best_bucket;
+
+       /* Count the new entry. */
+       counts[new_bucket]++;
+       best_bucket = new_bucket;
+
+       for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
+               unsigned this_bucket;
+
+               if (is_subhash(group[i]))
+                       continue;
+               this_bucket = group[i] & NTDB_OFF_HASH_GROUP_MASK;
+               if (++counts[this_bucket] > counts[best_bucket])
+                       best_bucket = this_bucket;
+       }
+
+       return best_bucket;
+}
+
+static bool put_into_group(ntdb_off_t *group,
+                          unsigned bucket, ntdb_off_t encoded)
+{
+       unsigned int i;
+
+       for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
+               unsigned b = (bucket + i) % (1 << NTDB_HASH_GROUP_BITS);
+
+               if (group[b] == 0) {
+                       group[b] = encoded;
+                       return true;
+               }
+       }
+       return false;
+}
+
+static void force_into_group(ntdb_off_t *group,
+                            unsigned bucket, ntdb_off_t encoded)
+{
+       if (!put_into_group(group, bucket, encoded))
+               abort();
+}
+
+static ntdb_off_t encode_offset(ntdb_off_t new_off, struct hash_info *h)
+{
+       return h->home_bucket
+               | new_off
+               | ((uint64_t)bits_from(h->h,
+                                 64 - h->hash_used - NTDB_OFF_UPPER_STEAL_EXTRA,
+                                 NTDB_OFF_UPPER_STEAL_EXTRA)
+                  << NTDB_OFF_HASH_EXTRA_BIT);
+}
+
+/* Simply overwrite the hash entry we found before. */
+enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb,
+                              struct hash_info *h,
+                              ntdb_off_t new_off)
+{
+       return ntdb_write_off(ntdb, hbucket_off(h->group_start, h->found_bucket),
+                            encode_offset(new_off, h));
+}
+
+/* We slot in anywhere that's empty in the chain. */
+static enum NTDB_ERROR COLD add_to_chain(struct ntdb_context *ntdb,
+                                       ntdb_off_t subhash,
+                                       ntdb_off_t new_off)
+{
+       ntdb_off_t entry;
+       enum NTDB_ERROR ecode;
+
+       entry = ntdb_find_zero_off(ntdb, subhash, 1<<NTDB_HASH_GROUP_BITS);
+       if (NTDB_OFF_IS_ERR(entry)) {
+               return NTDB_OFF_TO_ERR(entry);
+       }
+
+       if (entry == 1 << NTDB_HASH_GROUP_BITS) {
+               ntdb_off_t next;
+
+               next = ntdb_read_off(ntdb, subhash
+                                   + offsetof(struct ntdb_chain, next));
+               if (NTDB_OFF_IS_ERR(next)) {
+                       return NTDB_OFF_TO_ERR(next);
+               }
+
+               if (!next) {
+                       next = alloc(ntdb, 0, sizeof(struct ntdb_chain), 0,
+                                    NTDB_CHAIN_MAGIC, false);
+                       if (NTDB_OFF_IS_ERR(next))
+                               return NTDB_OFF_TO_ERR(next);
+                       ecode = zero_out(ntdb,
+                                        next+sizeof(struct ntdb_used_record),
+                                        sizeof(struct ntdb_chain));
+                       if (ecode != NTDB_SUCCESS) {
+                               return ecode;
+                       }
+                       ecode = ntdb_write_off(ntdb, subhash
+                                             + offsetof(struct ntdb_chain,
+                                                        next),
+                                             next);
+                       if (ecode != NTDB_SUCCESS) {
+                               return ecode;
+                       }
+               }
+               return add_to_chain(ntdb, next, new_off);
+       }
+
+       return ntdb_write_off(ntdb, subhash + entry * sizeof(ntdb_off_t),
+                            new_off);
+}
+
+/* Add into a newly created subhash. */
+static enum NTDB_ERROR add_to_subhash(struct ntdb_context *ntdb, ntdb_off_t subhash,
+                                    unsigned hash_used, ntdb_off_t val)
+{
+       ntdb_off_t off = (val & NTDB_OFF_MASK), *group;
+       struct hash_info h;
+       unsigned int gnum;
+
+       h.hash_used = hash_used;
+
+       if (hash_used + NTDB_SUBLEVEL_HASH_BITS > 64)
+               return add_to_chain(ntdb, subhash, off);
+
+       h.h = hash_record(ntdb, off);
+       gnum = use_bits(&h, NTDB_SUBLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS);
+       h.group_start = subhash
+               + gnum * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS);
+       h.home_bucket = use_bits(&h, NTDB_HASH_GROUP_BITS);
+
+       group = ntdb_access_write(ntdb, h.group_start,
+                                sizeof(*group) << NTDB_HASH_GROUP_BITS, true);
+       if (NTDB_PTR_IS_ERR(group)) {
+               return NTDB_PTR_ERR(group);
+       }
+       force_into_group(group, h.home_bucket, encode_offset(off, &h));
+       return ntdb_access_commit(ntdb, group);
+}
+
+static enum NTDB_ERROR expand_group(struct ntdb_context *ntdb, struct hash_info *h)
+{
+       unsigned bucket, num_vals, i, magic;
+       size_t subsize;
+       ntdb_off_t subhash;
+       ntdb_off_t vals[1 << NTDB_HASH_GROUP_BITS];
+       enum NTDB_ERROR ecode;
+
+       /* Attach new empty subhash under fullest bucket. */
+       bucket = fullest_bucket(ntdb, h->group, h->home_bucket);
+
+       if (h->hash_used == 64) {
+               ntdb->stats.alloc_chain++;
+               subsize = sizeof(struct ntdb_chain);
+               magic = NTDB_CHAIN_MAGIC;
+       } else {
+               ntdb->stats.alloc_subhash++;
+               subsize = (sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS);
+               magic = NTDB_HTABLE_MAGIC;
+       }
+
+       subhash = alloc(ntdb, 0, subsize, 0, magic, false);
+       if (NTDB_OFF_IS_ERR(subhash)) {
+               return NTDB_OFF_TO_ERR(subhash);
+       }
+
+       ecode = zero_out(ntdb, subhash + sizeof(struct ntdb_used_record),
+                        subsize);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       /* Remove any which are destined for bucket or are in wrong place. */
+       num_vals = 0;
+       for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
+               unsigned home_bucket = h->group[i] & NTDB_OFF_HASH_GROUP_MASK;
+               if (!h->group[i] || is_subhash(h->group[i]))
+                       continue;
+               if (home_bucket == bucket || home_bucket != i) {
+                       vals[num_vals++] = h->group[i];
+                       h->group[i] = 0;
+               }
+       }
+       /* FIXME: This assert is valid, but we do this during unit test :( */
+       /* assert(num_vals); */
+
+       /* Overwrite expanded bucket with subhash pointer. */
+       h->group[bucket] = subhash | (1ULL << NTDB_OFF_UPPER_STEAL_SUBHASH_BIT);
+
+       /* Point to actual contents of record. */
+       subhash += sizeof(struct ntdb_used_record);
+
+       /* Put values back. */
+       for (i = 0; i < num_vals; i++) {
+               unsigned this_bucket = vals[i] & NTDB_OFF_HASH_GROUP_MASK;
+
+               if (this_bucket == bucket) {
+                       ecode = add_to_subhash(ntdb, subhash, h->hash_used,
+                                              vals[i]);
+                       if (ecode != NTDB_SUCCESS)
+                               return ecode;
+               } else {
+                       /* There should be room to put this back. */
+                       force_into_group(h->group, this_bucket, vals[i]);
+               }
+       }
+       return NTDB_SUCCESS;
+}
+
+enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, struct hash_info *h)
+{
+       unsigned int i, num_movers = 0;
+       ntdb_off_t movers[1 << NTDB_HASH_GROUP_BITS];
+
+       h->group[h->found_bucket] = 0;
+       for (i = 1; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
+               unsigned this_bucket;
+
+               this_bucket = (h->found_bucket+i) % (1 << NTDB_HASH_GROUP_BITS);
+               /* Empty bucket?  We're done. */
+               if (!h->group[this_bucket])
+                       break;
+
+               /* Ignore subhashes. */
+               if (is_subhash(h->group[this_bucket]))
+                       continue;
+
+               /* If this one is not happy where it is, we'll move it. */
+               if ((h->group[this_bucket] & NTDB_OFF_HASH_GROUP_MASK)
+                   != this_bucket) {
+                       movers[num_movers++] = h->group[this_bucket];
+                       h->group[this_bucket] = 0;
+               }
+       }
+
+       /* Put back the ones we erased. */
+       for (i = 0; i < num_movers; i++) {
+               force_into_group(h->group, movers[i] & NTDB_OFF_HASH_GROUP_MASK,
+                                movers[i]);
+       }
+
+       /* Now we write back the hash group */
+       return ntdb_write_convert(ntdb, h->group_start,
+                                h->group, sizeof(h->group));
+}
+
+enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, struct hash_info *h,
+                          ntdb_off_t new_off)
+{
+       enum NTDB_ERROR ecode;
+
+       /* We hit an empty bucket during search?  That's where it goes. */
+       if (!h->group[h->found_bucket]) {
+               h->group[h->found_bucket] = encode_offset(new_off, h);
+               /* Write back the modified group. */
+               return ntdb_write_convert(ntdb, h->group_start,
+                                        h->group, sizeof(h->group));
+       }
+
+       if (h->hash_used > 64)
+               return add_to_chain(ntdb, h->group_start, new_off);
+
+       /* We're full.  Expand. */
+       ecode = expand_group(ntdb, h);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       if (is_subhash(h->group[h->home_bucket])) {
+               /* We were expanded! */
+               ntdb_off_t hashtable;
+               unsigned int gnum;
+
+               /* Write back the modified group. */
+               ecode = ntdb_write_convert(ntdb, h->group_start, h->group,
+                                         sizeof(h->group));
+               if (ecode != NTDB_SUCCESS) {
+                       return ecode;
+               }
+
+               /* Move hashinfo down a level. */
+               hashtable = (h->group[h->home_bucket] & NTDB_OFF_MASK)
+                       + sizeof(struct ntdb_used_record);
+               gnum = use_bits(h,NTDB_SUBLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS);
+               h->home_bucket = use_bits(h, NTDB_HASH_GROUP_BITS);
+               h->group_start = hashtable
+                       + gnum * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS);
+               ecode = ntdb_read_convert(ntdb, h->group_start, &h->group,
+                                        sizeof(h->group));
+               if (ecode != NTDB_SUCCESS) {
+                       return ecode;
+               }
+       }
+
+       /* Expanding the group must have made room if it didn't choose this
+        * bucket. */
+       if (put_into_group(h->group, h->home_bucket, encode_offset(new_off,h))){
+               return ntdb_write_convert(ntdb, h->group_start,
+                                        h->group, sizeof(h->group));
+       }
+
+       /* This can happen if all hashes in group (and us) dropped into same
+        * group in subhash. */
+       return add_to_hash(ntdb, h, new_off);
+}
+
+/* Traverse support: returns offset of record, or 0 or -ve error. */
+static ntdb_off_t iterate_hash(struct ntdb_context *ntdb,
+                             struct traverse_info *tinfo)
+{
+       ntdb_off_t off, val, i;
+       struct traverse_level *tlevel;
+
+       tlevel = &tinfo->levels[tinfo->num_levels-1];
+
+again:
+       for (i = ntdb_find_nonzero_off(ntdb, tlevel->hashtable,
+                                     tlevel->entry, tlevel->total_buckets);
+            i != tlevel->total_buckets;
+            i = ntdb_find_nonzero_off(ntdb, tlevel->hashtable,
+                                     i+1, tlevel->total_buckets)) {
+               if (NTDB_OFF_IS_ERR(i)) {
+                       return i;
+               }
+
+               val = ntdb_read_off(ntdb, tlevel->hashtable+sizeof(ntdb_off_t)*i);
+               if (NTDB_OFF_IS_ERR(val)) {
+                       return val;
+               }
+
+               off = val & NTDB_OFF_MASK;
+
+               /* This makes the delete-all-in-traverse case work
+                * (and simplifies our logic a little). */
+               if (off == tinfo->prev)
+                       continue;
+
+               tlevel->entry = i;
+
+               if (!is_subhash(val)) {
+                       /* Found one. */
+                       tinfo->prev = off;
+                       return off;
+               }
+
+               /* When we come back, we want the next one */
+               tlevel->entry++;
+               tinfo->num_levels++;
+               tlevel++;
+               tlevel->hashtable = off + sizeof(struct ntdb_used_record);
+               tlevel->entry = 0;
+               /* Next level is a chain? */
+               if (unlikely(tinfo->num_levels == NTDB_MAX_LEVELS + 1))
+                       tlevel->total_buckets = (1 << NTDB_HASH_GROUP_BITS);
+               else
+                       tlevel->total_buckets = (1 << NTDB_SUBLEVEL_HASH_BITS);
+               goto again;
+       }
+
+       /* Nothing there? */
+       if (tinfo->num_levels == 1)
+               return 0;
+
+       /* Handle chained entries. */
+       if (unlikely(tinfo->num_levels == NTDB_MAX_LEVELS + 1)) {
+               tlevel->hashtable = ntdb_read_off(ntdb, tlevel->hashtable
+                                                + offsetof(struct ntdb_chain,
+                                                           next));
+               if (NTDB_OFF_IS_ERR(tlevel->hashtable)) {
+                       return tlevel->hashtable;
+               }
+               if (tlevel->hashtable) {
+                       tlevel->hashtable += sizeof(struct ntdb_used_record);
+                       tlevel->entry = 0;
+                       goto again;
+               }
+       }
+
+       /* Go back up and keep searching. */
+       tinfo->num_levels--;
+       tlevel--;
+       goto again;
+}
+
+/* Return success if we find something, NTDB_ERR_NOEXIST if none. */
+enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb,
+                           struct traverse_info *tinfo,
+                           NTDB_DATA *kbuf, size_t *dlen)
+{
+       const unsigned group_bits = NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS;
+       ntdb_off_t hl_start, hl_range, off;
+       enum NTDB_ERROR ecode;
+
+       while (tinfo->toplevel_group < (1 << group_bits)) {
+               hl_start = (ntdb_off_t)tinfo->toplevel_group
+                       << (64 - group_bits);
+               hl_range = 1ULL << group_bits;
+               ecode = ntdb_lock_hashes(ntdb, hl_start, hl_range, F_RDLCK,
+                                       NTDB_LOCK_WAIT);
+               if (ecode != NTDB_SUCCESS) {
+                       return ecode;
+               }
+
+               off = iterate_hash(ntdb, tinfo);
+               if (off) {
+                       struct ntdb_used_record rec;
+
+                       if (NTDB_OFF_IS_ERR(off)) {
+                               ecode = NTDB_OFF_TO_ERR(off);
+                               goto fail;
+                       }
+
+                       ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+                       if (ecode != NTDB_SUCCESS) {
+                               goto fail;
+                       }
+                       if (rec_magic(&rec) != NTDB_USED_MAGIC) {
+                               ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+                                                  NTDB_LOG_ERROR,
+                                                  "next_in_hash:"
+                                                  " corrupt record at %llu",
+                                                  (long long)off);
+                               goto fail;
+                       }
+
+                       kbuf->dsize = rec_key_length(&rec);
+
+                       /* They want data as well? */
+                       if (dlen) {
+                               *dlen = rec_data_length(&rec);
+                               kbuf->dptr = ntdb_alloc_read(ntdb,
+                                                           off + sizeof(rec),
+                                                           kbuf->dsize
+                                                           + *dlen);
+                       } else {
+                               kbuf->dptr = ntdb_alloc_read(ntdb,
+                                                           off + sizeof(rec),
+                                                           kbuf->dsize);
+                       }
+                       ntdb_unlock_hashes(ntdb, hl_start, hl_range, F_RDLCK);
+                       if (NTDB_PTR_IS_ERR(kbuf->dptr)) {
+                               return NTDB_PTR_ERR(kbuf->dptr);
+                       }
+                       return NTDB_SUCCESS;
+               }
+
+               ntdb_unlock_hashes(ntdb, hl_start, hl_range, F_RDLCK);
+
+               tinfo->toplevel_group++;
+               tinfo->levels[0].hashtable
+                       += (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS);
+               tinfo->levels[0].entry = 0;
+       }
+       return NTDB_ERR_NOEXIST;
+
+fail:
+       ntdb_unlock_hashes(ntdb, hl_start, hl_range, F_RDLCK);
+       return ecode;
+
+}
+
+enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb,
+                            struct traverse_info *tinfo,
+                            NTDB_DATA *kbuf, size_t *dlen)
+{
+       tinfo->prev = 0;
+       tinfo->toplevel_group = 0;
+       tinfo->num_levels = 1;
+       tinfo->levels[0].hashtable = offsetof(struct ntdb_header, hashtable);
+       tinfo->levels[0].entry = 0;
+       tinfo->levels[0].total_buckets = (1 << NTDB_HASH_GROUP_BITS);
+
+       return next_in_hash(ntdb, tinfo, kbuf, dlen);
+}
+
+/* Even if the entry isn't in this hash bucket, you'd have to lock this
+ * bucket to find it. */
+static enum NTDB_ERROR chainlock(struct ntdb_context *ntdb, const NTDB_DATA *key,
+                               int ltype, enum ntdb_lock_flags waitflag,
+                               const char *func)
+{
+       enum NTDB_ERROR ecode;
+       uint64_t h = ntdb_hash(ntdb, key->dptr, key->dsize);
+       ntdb_off_t lockstart, locksize;
+       unsigned int group, gbits;
+
+       gbits = NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS;
+       group = bits_from(h, 64 - gbits, gbits);
+
+       lockstart = hlock_range(group, &locksize);
+
+       ecode = ntdb_lock_hashes(ntdb, lockstart, locksize, ltype, waitflag);
+       ntdb_trace_1rec(ntdb, func, *key);
+       return ecode;
+}
+
+/* lock/unlock one hash chain. This is meant to be used to reduce
+   contention - it cannot guarantee how many records will be locked */
+_PUBLIC_ enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+       return ntdb->last_error = chainlock(ntdb, &key, F_WRLCK, NTDB_LOCK_WAIT,
+                                          "ntdb_chainlock");
+}
+
+_PUBLIC_ void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+       uint64_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
+       ntdb_off_t lockstart, locksize;
+       unsigned int group, gbits;
+
+       gbits = NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS;
+       group = bits_from(h, 64 - gbits, gbits);
+
+       lockstart = hlock_range(group, &locksize);
+
+       ntdb_trace_1rec(ntdb, "ntdb_chainunlock", key);
+       ntdb_unlock_hashes(ntdb, lockstart, locksize, F_WRLCK);
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+       return ntdb->last_error = chainlock(ntdb, &key, F_RDLCK, NTDB_LOCK_WAIT,
+                                          "ntdb_chainlock_read");
+}
+
+_PUBLIC_ void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+       uint64_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
+       ntdb_off_t lockstart, locksize;
+       unsigned int group, gbits;
+
+       gbits = NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS;
+       group = bits_from(h, 64 - gbits, gbits);
+
+       lockstart = hlock_range(group, &locksize);
+
+       ntdb_trace_1rec(ntdb, "ntdb_chainunlock_read", key);
+       ntdb_unlock_hashes(ntdb, lockstart, locksize, F_RDLCK);
+}
diff --git a/lib/ntdb/io.c b/lib/ntdb/io.c
new file mode 100644 (file)
index 0000000..4580520
--- /dev/null
@@ -0,0 +1,650 @@
+ /*
+   Unix SMB/CIFS implementation.
+
+   trivial database library
+
+   Copyright (C) Andrew Tridgell              1999-2005
+   Copyright (C) Paul `Rusty' Russell             2000
+   Copyright (C) Jeremy Allison                           2000-2003
+   Copyright (C) Rusty Russell                    2010
+
+     ** NOTE! The following LGPL license applies to the ntdb
+     ** library. This does NOT imply that all of Samba is released
+     ** under the LGPL
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <assert.h>
+#include <ccan/likely/likely.h>
+
+void ntdb_munmap(struct ntdb_file *file)
+{
+       if (file->fd == -1)
+               return;
+
+       if (file->map_ptr) {
+               munmap(file->map_ptr, file->map_size);
+               file->map_ptr = NULL;
+       }
+}
+
+enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb)
+{
+       int mmap_flags;
+
+       if (ntdb->flags & NTDB_INTERNAL)
+               return NTDB_SUCCESS;
+
+#ifndef HAVE_INCOHERENT_MMAP
+       if (ntdb->flags & NTDB_NOMMAP)
+               return NTDB_SUCCESS;
+#endif
+
+       if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY)
+               mmap_flags = PROT_READ;
+       else
+               mmap_flags = PROT_READ | PROT_WRITE;
+
+       /* size_t can be smaller than off_t. */
+       if ((size_t)ntdb->file->map_size == ntdb->file->map_size) {
+               ntdb->file->map_ptr = mmap(NULL, ntdb->file->map_size,
+                                         mmap_flags,
+                                         MAP_SHARED, ntdb->file->fd, 0);
+       } else
+               ntdb->file->map_ptr = MAP_FAILED;
+
+       /*
+        * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
+        */
+       if (ntdb->file->map_ptr == MAP_FAILED) {
+               ntdb->file->map_ptr = NULL;
+#ifdef HAVE_INCOHERENT_MMAP
+               /* Incoherent mmap means everyone must mmap! */
+               return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                 "ntdb_mmap failed for size %lld (%s)",
+                                 (long long)ntdb->file->map_size,
+                                 strerror(errno));
+#else
+               ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
+                          "ntdb_mmap failed for size %lld (%s)",
+                          (long long)ntdb->file->map_size, strerror(errno));
+#endif
+       }
+       return NTDB_SUCCESS;
+}
+
+/* check for an out of bounds access - if it is out of bounds then
+   see if the database has been expanded by someone else and expand
+   if necessary
+   note that "len" is the minimum length needed for the db.
+
+   If probe is true, len being too large isn't a failure.
+*/
+static enum NTDB_ERROR ntdb_oob(struct ntdb_context *ntdb,
+                             ntdb_off_t off, ntdb_len_t len, bool probe)
+{
+       struct stat st;
+       enum NTDB_ERROR ecode;
+
+       /* We can't hold pointers during this: we could unmap! */
+       assert(!ntdb->direct_access
+              || (ntdb->flags & NTDB_NOLOCK)
+              || ntdb_has_expansion_lock(ntdb));
+
+       if (len + off < len) {
+               if (probe)
+                       return NTDB_SUCCESS;
+
+               return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                 "ntdb_oob off %llu len %llu wrap\n",
+                                 (long long)off, (long long)len);
+       }
+
+       if (len + off <= ntdb->file->map_size)
+               return NTDB_SUCCESS;
+       if (ntdb->flags & NTDB_INTERNAL) {
+               if (probe)
+                       return NTDB_SUCCESS;
+
+               ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                          "ntdb_oob len %lld beyond internal"
+                          " malloc size %lld",
+                          (long long)(off + len),
+                          (long long)ntdb->file->map_size);
+               return NTDB_ERR_IO;
+       }
+
+       ecode = ntdb_lock_expand(ntdb, F_RDLCK);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       if (fstat(ntdb->file->fd, &st) != 0) {
+               ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                          "Failed to fstat file: %s", strerror(errno));
+               ntdb_unlock_expand(ntdb, F_RDLCK);
+               return NTDB_ERR_IO;
+       }
+
+       ntdb_unlock_expand(ntdb, F_RDLCK);
+
+       if (st.st_size < off + len) {
+               if (probe)
+                       return NTDB_SUCCESS;
+
+               ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                          "ntdb_oob len %llu beyond eof at %llu",
+                          (long long)(off + len), (long long)st.st_size);
+               return NTDB_ERR_IO;
+       }
+
+       /* Unmap, update size, remap */
+       ntdb_munmap(ntdb->file);
+
+       ntdb->file->map_size = st.st_size;
+       return ntdb_mmap(ntdb);
+}
+
+/* Endian conversion: we only ever deal with 8 byte quantities */
+void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size)
+{
+       assert(size % 8 == 0);
+       if (unlikely((ntdb->flags & NTDB_CONVERT)) && buf) {
+               uint64_t i, *p = (uint64_t *)buf;
+               for (i = 0; i < size / 8; i++)
+                       p[i] = bswap_64(p[i]);
+       }
+       return buf;
+}
+
+/* Return first non-zero offset in offset array, or end, or -ve error. */
+/* FIXME: Return the off? */
+uint64_t ntdb_find_nonzero_off(struct ntdb_context *ntdb,
+                             ntdb_off_t base, uint64_t start, uint64_t end)
+{
+       uint64_t i;
+       const uint64_t *val;
+
+       /* Zero vs non-zero is the same unconverted: minor optimization. */
+       val = ntdb_access_read(ntdb, base + start * sizeof(ntdb_off_t),
+                             (end - start) * sizeof(ntdb_off_t), false);
+       if (NTDB_PTR_IS_ERR(val)) {
+               return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val));
+       }
+
+       for (i = 0; i < (end - start); i++) {
+               if (val[i])
+                       break;
+       }
+       ntdb_access_release(ntdb, val);
+       return start + i;
+}
+
+/* Return first zero offset in num offset array, or num, or -ve error. */
+uint64_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off,
+                          uint64_t num)
+{
+       uint64_t i;
+       const uint64_t *val;
+
+       /* Zero vs non-zero is the same unconverted: minor optimization. */
+       val = ntdb_access_read(ntdb, off, num * sizeof(ntdb_off_t), false);
+       if (NTDB_PTR_IS_ERR(val)) {
+               return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val));
+       }
+
+       for (i = 0; i < num; i++) {
+               if (!val[i])
+                       break;
+       }
+       ntdb_access_release(ntdb, val);
+       return i;
+}
+
+enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len)
+{
+       char buf[8192] = { 0 };
+       void *p = ntdb->io->direct(ntdb, off, len, true);
+       enum NTDB_ERROR ecode = NTDB_SUCCESS;
+
+       assert(!(ntdb->flags & NTDB_RDONLY));
+       if (NTDB_PTR_IS_ERR(p)) {
+               return NTDB_PTR_ERR(p);
+       }
+       if (p) {
+               memset(p, 0, len);
+               return ecode;
+       }
+       while (len) {
+               unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
+               ecode = ntdb->io->twrite(ntdb, off, buf, todo);
+               if (ecode != NTDB_SUCCESS) {
+                       break;
+               }
+               len -= todo;
+               off += todo;
+       }
+       return ecode;
+}
+
+ntdb_off_t ntdb_read_off(struct ntdb_context *ntdb, ntdb_off_t off)
+{
+       ntdb_off_t ret;
+       enum NTDB_ERROR ecode;
+
+       if (likely(!(ntdb->flags & NTDB_CONVERT))) {
+               ntdb_off_t *p = ntdb->io->direct(ntdb, off, sizeof(*p), false);
+               if (NTDB_PTR_IS_ERR(p)) {
+                       return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(p));
+               }
+               if (p)
+                       return *p;
+       }
+
+       ecode = ntdb_read_convert(ntdb, off, &ret, sizeof(ret));
+       if (ecode != NTDB_SUCCESS) {
+               return NTDB_ERR_TO_OFF(ecode);
+       }
+       return ret;
+}
+
+/* write a lump of data at a specified offset */
+static enum NTDB_ERROR ntdb_write(struct ntdb_context *ntdb, ntdb_off_t off,
+                               const void *buf, ntdb_len_t len)
+{
+       enum NTDB_ERROR ecode;
+
+       if (ntdb->flags & NTDB_RDONLY) {
+               return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
+                                 "Write to read-only database");
+       }
+
+       ecode = ntdb->io->oob(ntdb, off, len, false);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       if (ntdb->file->map_ptr) {
+               memcpy(off + (char *)ntdb->file->map_ptr, buf, len);
+       } else {
+#ifdef HAVE_INCOHERENT_MMAP
+               return NTDB_ERR_IO;
+#else
+               ssize_t ret;
+               ret = pwrite(ntdb->file->fd, buf, len, off);
+               if (ret != len) {
+                       /* This shouldn't happen: we avoid sparse files. */
+                       if (ret >= 0)
+                               errno = ENOSPC;
+
+                       return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                         "ntdb_write: %zi at %zu len=%zu (%s)",
+                                         ret, (size_t)off, (size_t)len,
+                                         strerror(errno));
+               }
+#endif
+       }
+       return NTDB_SUCCESS;
+}
+
+/* read a lump of data at a specified offset */
+static enum NTDB_ERROR ntdb_read(struct ntdb_context *ntdb, ntdb_off_t off,
+                              void *buf, ntdb_len_t len)
+{
+       enum NTDB_ERROR ecode;
+
+       ecode = ntdb->io->oob(ntdb, off, len, false);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       if (ntdb->file->map_ptr) {
+               memcpy(buf, off + (char *)ntdb->file->map_ptr, len);
+       } else {
+#ifdef HAVE_INCOHERENT_MMAP
+               return NTDB_ERR_IO;
+#else
+               ssize_t r = pread(ntdb->file->fd, buf, len, off);
+               if (r != len) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                         "ntdb_read failed with %zi at %zu "
+                                         "len=%zu (%s) map_size=%zu",
+                                         r, (size_t)off, (size_t)len,
+                                         strerror(errno),
+                                         (size_t)ntdb->file->map_size);
+               }
+#endif
+       }
+       return NTDB_SUCCESS;
+}
+
+enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off,
+                                const void *rec, size_t len)
+{
+       enum NTDB_ERROR ecode;
+
+       if (unlikely((ntdb->flags & NTDB_CONVERT))) {
+               void *conv = malloc(len);
+               if (!conv) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                         "ntdb_write: no memory converting"
+                                         " %zu bytes", len);
+               }
+               memcpy(conv, rec, len);
+               ecode = ntdb->io->twrite(ntdb, off,
+                                       ntdb_convert(ntdb, conv, len), len);
+               free(conv);
+       } else {
+               ecode = ntdb->io->twrite(ntdb, off, rec, len);
+       }
+       return ecode;
+}
+
+enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off,
+                               void *rec, size_t len)
+{
+       enum NTDB_ERROR ecode = ntdb->io->tread(ntdb, off, rec, len);
+       ntdb_convert(ntdb, rec, len);
+       return ecode;
+}
+
+enum NTDB_ERROR ntdb_write_off(struct ntdb_context *ntdb,
+                            ntdb_off_t off, ntdb_off_t val)
+{
+       if (ntdb->flags & NTDB_RDONLY) {
+               return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
+                                 "Write to read-only database");
+       }
+
+       if (likely(!(ntdb->flags & NTDB_CONVERT))) {
+               ntdb_off_t *p = ntdb->io->direct(ntdb, off, sizeof(*p), true);
+               if (NTDB_PTR_IS_ERR(p)) {
+                       return NTDB_PTR_ERR(p);
+               }
+               if (p) {
+                       *p = val;
+                       return NTDB_SUCCESS;
+               }
+       }
+       return ntdb_write_convert(ntdb, off, &val, sizeof(val));
+}
+
+static void *_ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset,
+                            ntdb_len_t len, unsigned int prefix)
+{
+       unsigned char *buf;
+       enum NTDB_ERROR ecode;
+
+       /* some systems don't like zero length malloc */
+       buf = malloc(prefix + len ? prefix + len : 1);
+       if (!buf) {
+               ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_USE_ERROR,
+                          "ntdb_alloc_read malloc failed len=%zu",
+                          (size_t)(prefix + len));
+               return NTDB_ERR_PTR(NTDB_ERR_OOM);
+       } else {
+               ecode = ntdb->io->tread(ntdb, offset, buf+prefix, len);
+               if (unlikely(ecode != NTDB_SUCCESS)) {
+                       free(buf);
+                       return NTDB_ERR_PTR(ecode);
+               }
+       }
+       return buf;
+}
+
+/* read a lump of data, allocating the space for it */
+void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len)
+{
+       return _ntdb_alloc_read(ntdb, offset, len, 0);
+}
+
+static enum NTDB_ERROR fill(struct ntdb_context *ntdb,
+                          const void *buf, size_t size,
+                          ntdb_off_t off, ntdb_len_t len)
+{
+       while (len) {
+               size_t n = len > size ? size : len;
+               ssize_t ret = pwrite(ntdb->file->fd, buf, n, off);
+               if (ret != n) {
+                       if (ret >= 0)
+                               errno = ENOSPC;
+
+                       return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                         "fill failed:"
+                                         " %zi at %zu len=%zu (%s)",
+                                         ret, (size_t)off, (size_t)len,
+                                         strerror(errno));
+               }
+               len -= n;
+               off += n;
+       }
+       return NTDB_SUCCESS;
+}
+
+/* expand a file.  we prefer to use ftruncate, as that is what posix
+  says to use for mmap expansion */
+static enum NTDB_ERROR ntdb_expand_file(struct ntdb_context *ntdb,
+                                     ntdb_len_t addition)
+{
+       char buf[8192];
+       enum NTDB_ERROR ecode;
+
+       if (ntdb->flags & NTDB_RDONLY) {
+               return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
+                                 "Expand on read-only database");
+       }
+
+       if (ntdb->flags & NTDB_INTERNAL) {
+               char *new = realloc(ntdb->file->map_ptr,
+                                   ntdb->file->map_size + addition);
+               if (!new) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                         "No memory to expand database");
+               }
+               ntdb->file->map_ptr = new;
+               ntdb->file->map_size += addition;
+               return NTDB_SUCCESS;
+       } else {
+               /* Unmap before trying to write; old NTDB claimed OpenBSD had
+                * problem with this otherwise. */
+               ntdb_munmap(ntdb->file);
+
+               /* If this fails, we try to fill anyway. */
+               if (ftruncate(ntdb->file->fd, ntdb->file->map_size + addition))
+                       ;
+
+               /* now fill the file with something. This ensures that the
+                  file isn't sparse, which would be very bad if we ran out of
+                  disk. This must be done with write, not via mmap */
+               memset(buf, 0x43, sizeof(buf));
+               ecode = fill(ntdb, buf, sizeof(buf), ntdb->file->map_size,
+                            addition);
+               if (ecode != NTDB_SUCCESS)
+                       return ecode;
+               ntdb->file->map_size += addition;
+               return ntdb_mmap(ntdb);
+       }
+}
+
+const void *ntdb_access_read(struct ntdb_context *ntdb,
+                           ntdb_off_t off, ntdb_len_t len, bool convert)
+{
+       void *ret = NULL;
+
+       if (likely(!(ntdb->flags & NTDB_CONVERT))) {
+               ret = ntdb->io->direct(ntdb, off, len, false);
+
+               if (NTDB_PTR_IS_ERR(ret)) {
+                       return ret;
+               }
+       }
+       if (!ret) {
+               struct ntdb_access_hdr *hdr;
+               hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr));
+               if (NTDB_PTR_IS_ERR(hdr)) {
+                       return hdr;
+               }
+               hdr->next = ntdb->access;
+               ntdb->access = hdr;
+               ret = hdr + 1;
+               if (convert) {
+                       ntdb_convert(ntdb, (void *)ret, len);
+               }
+       } else
+               ntdb->direct_access++;
+
+       return ret;
+}
+
+void *ntdb_access_write(struct ntdb_context *ntdb,
+                      ntdb_off_t off, ntdb_len_t len, bool convert)
+{
+       void *ret = NULL;
+
+       if (ntdb->flags & NTDB_RDONLY) {
+               ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
+                          "Write to read-only database");
+               return NTDB_ERR_PTR(NTDB_ERR_RDONLY);
+       }
+
+       if (likely(!(ntdb->flags & NTDB_CONVERT))) {
+               ret = ntdb->io->direct(ntdb, off, len, true);
+
+               if (NTDB_PTR_IS_ERR(ret)) {
+                       return ret;
+               }
+       }
+
+       if (!ret) {
+               struct ntdb_access_hdr *hdr;
+               hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr));
+               if (NTDB_PTR_IS_ERR(hdr)) {
+                       return hdr;
+               }
+               hdr->next = ntdb->access;
+               ntdb->access = hdr;
+               hdr->off = off;
+               hdr->len = len;
+               hdr->convert = convert;
+               ret = hdr + 1;
+               if (convert)
+                       ntdb_convert(ntdb, (void *)ret, len);
+       } else
+               ntdb->direct_access++;
+
+       return ret;
+}
+
+static struct ntdb_access_hdr **find_hdr(struct ntdb_context *ntdb, const void *p)
+{
+       struct ntdb_access_hdr **hp;
+
+       for (hp = &ntdb->access; *hp; hp = &(*hp)->next) {
+               if (*hp + 1 == p)
+                       return hp;
+       }
+       return NULL;
+}
+
+void ntdb_access_release(struct ntdb_context *ntdb, const void *p)
+{
+       struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p);
+
+       if (hp) {
+               hdr = *hp;
+               *hp = hdr->next;
+               free(hdr);
+       } else
+               ntdb->direct_access--;
+}
+
+enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p)
+{
+       struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p);
+       enum NTDB_ERROR ecode;
+
+       if (hp) {
+               hdr = *hp;
+               if (hdr->convert)
+                       ecode = ntdb_write_convert(ntdb, hdr->off, p, hdr->len);
+               else
+                       ecode = ntdb_write(ntdb, hdr->off, p, hdr->len);
+               *hp = hdr->next;
+               free(hdr);
+       } else {
+               ntdb->direct_access--;
+               ecode = NTDB_SUCCESS;
+       }
+
+       return ecode;
+}
+
+static void *ntdb_direct(struct ntdb_context *ntdb, ntdb_off_t off, size_t len,
+                       bool write_mode)
+{
+       enum NTDB_ERROR ecode;
+
+       if (unlikely(!ntdb->file->map_ptr))
+               return NULL;
+
+       ecode = ntdb_oob(ntdb, off, len, false);
+       if (unlikely(ecode != NTDB_SUCCESS))
+               return NTDB_ERR_PTR(ecode);
+       return (char *)ntdb->file->map_ptr + off;
+}
+
+void ntdb_inc_seqnum(struct ntdb_context *ntdb)
+{
+       ntdb_off_t seq;
+
+       if (likely(!(ntdb->flags & NTDB_CONVERT))) {
+               int64_t *direct;
+
+               direct = ntdb->io->direct(ntdb,
+                                        offsetof(struct ntdb_header, seqnum),
+                                        sizeof(*direct), true);
+               if (likely(direct)) {
+                       /* Don't let it go negative, even briefly */
+                       if (unlikely((*direct) + 1) < 0)
+                               *direct = 0;
+                       (*direct)++;
+                       return;
+               }
+       }
+
+       seq = ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
+       if (!NTDB_OFF_IS_ERR(seq)) {
+               seq++;
+               if (unlikely((int64_t)seq < 0))
+                       seq = 0;
+               ntdb_write_off(ntdb, offsetof(struct ntdb_header, seqnum), seq);
+       }
+}
+
+static const struct ntdb_methods io_methods = {
+       ntdb_read,
+       ntdb_write,
+       ntdb_oob,
+       ntdb_expand_file,
+       ntdb_direct,
+};
+
+/*
+  initialise the default methods table
+*/
+void ntdb_io_init(struct ntdb_context *ntdb)
+{
+       ntdb->io = &io_methods;
+}
diff --git a/lib/ntdb/lock.c b/lib/ntdb/lock.c
new file mode 100644 (file)
index 0000000..167770d
--- /dev/null
@@ -0,0 +1,883 @@
+ /*
+   Unix SMB/CIFS implementation.
+
+   trivial database library
+
+   Copyright (C) Andrew Tridgell              1999-2005
+   Copyright (C) Paul `Rusty' Russell             2000
+   Copyright (C) Jeremy Allison                           2000-2003
+
+     ** NOTE! The following LGPL license applies to the ntdb
+     ** library. This does NOT imply that all of Samba is released
+     ** under the LGPL
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "private.h"
+#include <assert.h>
+#include <ccan/build_assert/build_assert.h>
+
+/* If we were threaded, we could wait for unlock, but we're not, so fail. */
+enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call)
+{
+       return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+                         "%s: lock owned by another ntdb in this process.",
+                         call);
+}
+
+/* If we fork, we no longer really own locks. */
+bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log)
+{
+       /* No locks?  No problem! */
+       if (ntdb->file->allrecord_lock.count == 0
+           && ntdb->file->num_lockrecs == 0) {
+               return true;
+       }
+
+       /* No fork?  No problem! */
+       if (ntdb->file->locker == getpid()) {
+               return true;
+       }
+
+       if (log) {
+               ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+                          "%s: fork() detected after lock acquisition!"
+                          " (%u vs %u)", call, ntdb->file->locker, getpid());
+       }
+       return false;
+}
+
+int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
+                  void *unused)
+{
+       struct flock fl;
+       int ret;
+
+       do {
+               fl.l_type = rw;
+               fl.l_whence = SEEK_SET;
+               fl.l_start = off;
+               fl.l_len = len;
+
+               if (waitflag)
+                       ret = fcntl(fd, F_SETLKW, &fl);
+               else
+                       ret = fcntl(fd, F_SETLK, &fl);
+       } while (ret != 0 && errno == EINTR);
+       return ret;
+}
+
+int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *unused)
+{
+       struct flock fl;
+       int ret;
+
+       do {
+               fl.l_type = F_UNLCK;
+               fl.l_whence = SEEK_SET;
+               fl.l_start = off;
+               fl.l_len = len;
+
+               ret = fcntl(fd, F_SETLKW, &fl);
+       } while (ret != 0 && errno == EINTR);
+       return ret;
+}
+
+static int lock(struct ntdb_context *ntdb,
+                     int rw, off_t off, off_t len, bool waitflag)
+{
+       int ret;
+       if (ntdb->file->allrecord_lock.count == 0
+           && ntdb->file->num_lockrecs == 0) {
+               ntdb->file->locker = getpid();
+       }
+
+       ntdb->stats.lock_lowlevel++;
+       ret = ntdb->lock_fn(ntdb->file->fd, rw, off, len, waitflag,
+                          ntdb->lock_data);
+       if (!waitflag) {
+               ntdb->stats.lock_nonblock++;
+               if (ret != 0)
+                       ntdb->stats.lock_nonblock_fail++;
+       }
+       return ret;
+}
+
+static int unlock(struct ntdb_context *ntdb, int rw, off_t off, off_t len)
+{
+#if 0 /* Check they matched up locks and unlocks correctly. */
+       char line[80];
+       FILE *locks;
+       bool found = false;
+
+       locks = fopen("/proc/locks", "r");
+
+       while (fgets(line, 80, locks)) {
+               char *p;
+               int type, start, l;
+
+               /* eg. 1: FLOCK  ADVISORY  WRITE 2440 08:01:2180826 0 EOF */
+               p = strchr(line, ':') + 1;
+               if (strncmp(p, " POSIX  ADVISORY  ", strlen(" POSIX  ADVISORY  ")))
+                       continue;
+               p += strlen(" FLOCK  ADVISORY  ");
+               if (strncmp(p, "READ  ", strlen("READ  ")) == 0)
+                       type = F_RDLCK;
+               else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
+                       type = F_WRLCK;
+               else
+                       abort();
+               p += 6;
+               if (atoi(p) != getpid())
+                       continue;
+               p = strchr(strchr(p, ' ') + 1, ' ') + 1;
+               start = atoi(p);
+               p = strchr(p, ' ') + 1;
+               if (strncmp(p, "EOF", 3) == 0)
+                       l = 0;
+               else
+                       l = atoi(p) - start + 1;
+
+               if (off == start) {
+                       if (len != l) {
+                               fprintf(stderr, "Len %u should be %u: %s",
+                                       (int)len, l, line);
+                               abort();
+                       }
+                       if (type != rw) {
+                               fprintf(stderr, "Type %s wrong: %s",
+                                       rw == F_RDLCK ? "READ" : "WRITE", line);
+                               abort();
+                       }
+                       found = true;
+                       break;
+               }
+       }
+
+       if (!found) {
+               fprintf(stderr, "Unlock on %u@%u not found!",
+                       (int)off, (int)len);
+               abort();
+       }
+
+       fclose(locks);
+#endif
+
+       return ntdb->unlock_fn(ntdb->file->fd, rw, off, len, ntdb->lock_data);
+}
+
+/* a byte range locking function - return 0 on success
+   this functions locks len bytes at the specified offset.
+
+   note that a len of zero means lock to end of file
+*/
+static enum NTDB_ERROR ntdb_brlock(struct ntdb_context *ntdb,
+                                int rw_type, ntdb_off_t offset, ntdb_off_t len,
+                                enum ntdb_lock_flags flags)
+{
+       int ret;
+
+       if (ntdb->flags & NTDB_NOLOCK) {
+               return NTDB_SUCCESS;
+       }
+
+       if (rw_type == F_WRLCK && (ntdb->flags & NTDB_RDONLY)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
+                                 "Write lock attempted on read-only database");
+       }
+
+       /* A 32 bit system cannot open a 64-bit file, but it could have
+        * expanded since then: check here. */
+       if ((size_t)(offset + len) != offset + len) {
+               return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                 "ntdb_brlock: lock on giant offset %llu",
+                                 (long long)(offset + len));
+       }
+
+       ret = lock(ntdb, rw_type, offset, len, flags & NTDB_LOCK_WAIT);
+       if (ret != 0) {
+               /* Generic lock error. errno set by fcntl.
+                * EAGAIN is an expected return from non-blocking
+                * locks. */
+               if (!(flags & NTDB_LOCK_PROBE)
+                   && (errno != EAGAIN && errno != EINTR)) {
+                       ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                  "ntdb_brlock failed (fd=%d) at"
+                                  " offset %zu rw_type=%d flags=%d len=%zu:"
+                                  " %s",
+                                  ntdb->file->fd, (size_t)offset, rw_type,
+                                  flags, (size_t)len, strerror(errno));
+               }
+               return NTDB_ERR_LOCK;
+       }
+       return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR ntdb_brunlock(struct ntdb_context *ntdb,
+                                  int rw_type, ntdb_off_t offset, size_t len)
+{
+       if (ntdb->flags & NTDB_NOLOCK) {
+               return NTDB_SUCCESS;
+       }
+
+       if (!check_lock_pid(ntdb, "ntdb_brunlock", true))
+               return NTDB_ERR_LOCK;
+
+       if (unlock(ntdb, rw_type, offset, len) == -1) {
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_brunlock failed (fd=%d) at offset %zu"
+                                 " rw_type=%d len=%zu: %s",
+                                 ntdb->file->fd, (size_t)offset, rw_type,
+                                 (size_t)len, strerror(errno));
+       }
+       return NTDB_SUCCESS;
+}
+
+/*
+  upgrade a read lock to a write lock. This needs to be handled in a
+  special way as some OSes (such as solaris) have too conservative
+  deadlock detection and claim a deadlock when progress can be
+  made. For those OSes we may loop for a while.
+*/
+enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start)
+{
+       int count = 1000;
+
+       if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true))
+               return NTDB_ERR_LOCK;
+
+       if (ntdb->file->allrecord_lock.count != 1) {
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_allrecord_upgrade failed:"
+                                 " count %u too high",
+                                 ntdb->file->allrecord_lock.count);
+       }
+
+       if (ntdb->file->allrecord_lock.off != 1) {
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_allrecord_upgrade failed:"
+                                 " already upgraded?");
+       }
+
+       if (ntdb->file->allrecord_lock.owner != ntdb) {
+               return owner_conflict(ntdb, "ntdb_allrecord_upgrade");
+       }
+
+       while (count--) {
+               struct timeval tv;
+               if (ntdb_brlock(ntdb, F_WRLCK, start, 0,
+                              NTDB_LOCK_WAIT|NTDB_LOCK_PROBE) == NTDB_SUCCESS) {
+                       ntdb->file->allrecord_lock.ltype = F_WRLCK;
+                       ntdb->file->allrecord_lock.off = 0;
+                       return NTDB_SUCCESS;
+               }
+               if (errno != EDEADLK) {
+                       break;
+               }
+               /* sleep for as short a time as we can - more portable than usleep() */
+               tv.tv_sec = 0;
+               tv.tv_usec = 1;
+               select(0, NULL, NULL, NULL, &tv);
+       }
+
+       if (errno != EAGAIN && errno != EINTR)
+               ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                          "ntdb_allrecord_upgrade failed");
+       return NTDB_ERR_LOCK;
+}
+
+static struct ntdb_lock *find_nestlock(struct ntdb_context *ntdb, ntdb_off_t offset,
+                                     const struct ntdb_context *owner)
+{
+       unsigned int i;
+
+       for (i=0; i<ntdb->file->num_lockrecs; i++) {
+               if (ntdb->file->lockrecs[i].off == offset) {
+                       if (owner && ntdb->file->lockrecs[i].owner != owner)
+                               return NULL;
+                       return &ntdb->file->lockrecs[i];
+               }
+       }
+       return NULL;
+}
+
+enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb)
+{
+       enum NTDB_ERROR ecode;
+
+       if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true))
+               return NTDB_ERR_LOCK;
+
+       ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK,
+                                  false);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
+       if (ecode != NTDB_SUCCESS) {
+               ntdb_allrecord_unlock(ntdb, F_WRLCK);
+               return ecode;
+       }
+       ecode = ntdb_transaction_recover(ntdb);
+       ntdb_unlock_open(ntdb, F_WRLCK);
+       ntdb_allrecord_unlock(ntdb, F_WRLCK);
+
+       return ecode;
+}
+
+/* lock an offset in the database. */
+static enum NTDB_ERROR ntdb_nest_lock(struct ntdb_context *ntdb,
+                                   ntdb_off_t offset, int ltype,
+                                   enum ntdb_lock_flags flags)
+{
+       struct ntdb_lock *new_lck;
+       enum NTDB_ERROR ecode;
+
+       if (offset > (NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE
+                     + ntdb->file->map_size / 8)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_nest_lock: invalid offset %zu ltype=%d",
+                                 (size_t)offset, ltype);
+       }
+
+       if (ntdb->flags & NTDB_NOLOCK)
+               return NTDB_SUCCESS;
+
+       if (!check_lock_pid(ntdb, "ntdb_nest_lock", true)) {
+               return NTDB_ERR_LOCK;
+       }
+
+       ntdb->stats.locks++;
+
+       new_lck = find_nestlock(ntdb, offset, NULL);
+       if (new_lck) {
+               if (new_lck->owner != ntdb) {
+                       return owner_conflict(ntdb, "ntdb_nest_lock");
+               }
+
+               if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                         "ntdb_nest_lock:"
+                                         " offset %zu has read lock",
+                                         (size_t)offset);
+               }
+               /* Just increment the struct, posix locks don't stack. */
+               new_lck->count++;
+               return NTDB_SUCCESS;
+       }
+
+#if 0
+       if (ntdb->file->num_lockrecs
+           && offset >= NTDB_HASH_LOCK_START
+           && offset < NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE) {
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_nest_lock: already have a hash lock?");
+       }
+#endif
+
+       new_lck = (struct ntdb_lock *)realloc(
+               ntdb->file->lockrecs,
+               sizeof(*ntdb->file->lockrecs) * (ntdb->file->num_lockrecs+1));
+       if (new_lck == NULL) {
+               return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                 "ntdb_nest_lock:"
+                                 " unable to allocate %zu lock struct",
+                                 ntdb->file->num_lockrecs + 1);
+       }
+       ntdb->file->lockrecs = new_lck;
+
+       /* Since fcntl locks don't nest, we do a lock for the first one,
+          and simply bump the count for future ones */
+       ecode = ntdb_brlock(ntdb, ltype, offset, 1, flags);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       /* First time we grab a lock, perhaps someone died in commit? */
+       if (!(flags & NTDB_LOCK_NOCHECK)
+           && ntdb->file->num_lockrecs == 0) {
+               ntdb_bool_err berr = ntdb_needs_recovery(ntdb);
+               if (berr != false) {
+                       ntdb_brunlock(ntdb, ltype, offset, 1);
+
+                       if (berr < 0)
+                               return NTDB_OFF_TO_ERR(berr);
+                       ecode = ntdb_lock_and_recover(ntdb);
+                       if (ecode == NTDB_SUCCESS) {
+                               ecode = ntdb_brlock(ntdb, ltype, offset, 1,
+                                                  flags);
+                       }
+                       if (ecode != NTDB_SUCCESS) {
+                               return ecode;
+                       }
+               }
+       }
+
+       ntdb->file->lockrecs[ntdb->file->num_lockrecs].owner = ntdb;
+       ntdb->file->lockrecs[ntdb->file->num_lockrecs].off = offset;
+       ntdb->file->lockrecs[ntdb->file->num_lockrecs].count = 1;
+       ntdb->file->lockrecs[ntdb->file->num_lockrecs].ltype = ltype;
+       ntdb->file->num_lockrecs++;
+
+       return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR ntdb_nest_unlock(struct ntdb_context *ntdb,
+                                     ntdb_off_t off, int ltype)
+{
+       struct ntdb_lock *lck;
+       enum NTDB_ERROR ecode;
+
+       if (ntdb->flags & NTDB_NOLOCK)
+               return NTDB_SUCCESS;
+
+       lck = find_nestlock(ntdb, off, ntdb);
+       if ((lck == NULL) || (lck->count == 0)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_nest_unlock: no lock for %zu",
+                                 (size_t)off);
+       }
+
+       if (lck->count > 1) {
+               lck->count--;
+               return NTDB_SUCCESS;
+       }
+
+       /*
+        * This lock has count==1 left, so we need to unlock it in the
+        * kernel. We don't bother with decrementing the in-memory array
+        * element, we're about to overwrite it with the last array element
+        * anyway.
+        */
+       ecode = ntdb_brunlock(ntdb, ltype, off, 1);
+
+       /*
+        * Shrink the array by overwriting the element just unlocked with the
+        * last array element.
+        */
+       *lck = ntdb->file->lockrecs[--ntdb->file->num_lockrecs];
+
+       return ecode;
+}
+
+/*
+  get the transaction lock
+ */
+enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype)
+{
+       return ntdb_nest_lock(ntdb, NTDB_TRANSACTION_LOCK, ltype, NTDB_LOCK_WAIT);
+}
+
+/*
+  release the transaction lock
+ */
+void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype)
+{
+       ntdb_nest_unlock(ntdb, NTDB_TRANSACTION_LOCK, ltype);
+}
+
+/* We only need to lock individual bytes, but Linux merges consecutive locks
+ * so we lock in contiguous ranges. */
+static enum NTDB_ERROR ntdb_lock_gradual(struct ntdb_context *ntdb,
+                                      int ltype, enum ntdb_lock_flags flags,
+                                      ntdb_off_t off, ntdb_off_t len)
+{
+       enum NTDB_ERROR ecode;
+       enum ntdb_lock_flags nb_flags = (flags & ~NTDB_LOCK_WAIT);
+
+       if (len <= 1) {
+               /* 0 would mean to end-of-file... */
+               assert(len != 0);
+               /* Single hash.  Just do blocking lock. */
+               return ntdb_brlock(ntdb, ltype, off, len, flags);
+       }
+
+       /* First we try non-blocking. */
+       ecode = ntdb_brlock(ntdb, ltype, off, len, nb_flags);
+       if (ecode != NTDB_ERR_LOCK) {
+               return ecode;
+       }
+
+       /* Try locking first half, then second. */
+       ecode = ntdb_lock_gradual(ntdb, ltype, flags, off, len / 2);
+       if (ecode != NTDB_SUCCESS)
+               return ecode;
+
+       ecode = ntdb_lock_gradual(ntdb, ltype, flags,
+                                off + len / 2, len - len / 2);
+       if (ecode != NTDB_SUCCESS) {
+               ntdb_brunlock(ntdb, ltype, off, len / 2);
+       }
+       return ecode;
+}
+
+/* lock/unlock entire database.  It can only be upgradable if you have some
+ * other way of guaranteeing exclusivity (ie. transaction write lock). */
+enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype,
+                                 enum ntdb_lock_flags flags, bool upgradable)
+{
+       enum NTDB_ERROR ecode;
+       ntdb_bool_err berr;
+
+       if (ntdb->flags & NTDB_NOLOCK)
+               return NTDB_SUCCESS;
+
+       if (!check_lock_pid(ntdb, "ntdb_allrecord_lock", true)) {
+               return NTDB_ERR_LOCK;
+       }
+
+       if (ntdb->file->allrecord_lock.count) {
+               if (ntdb->file->allrecord_lock.owner != ntdb) {
+                       return owner_conflict(ntdb, "ntdb_allrecord_lock");
+               }
+
+               if (ltype == F_RDLCK
+                   || ntdb->file->allrecord_lock.ltype == F_WRLCK) {
+                       ntdb->file->allrecord_lock.count++;
+                       return NTDB_SUCCESS;
+               }
+
+               /* a global lock of a different type exists */
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+                                 "ntdb_allrecord_lock: already have %s lock",
+                                 ntdb->file->allrecord_lock.ltype == F_RDLCK
+                                 ? "read" : "write");
+       }
+
+       if (ntdb_has_hash_locks(ntdb)) {
+               /* can't combine global and chain locks */
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+                                 "ntdb_allrecord_lock:"
+                                 " already have chain lock");
+       }
+
+       if (upgradable && ltype != F_RDLCK) {
+               /* ntdb error: you can't upgrade a write lock! */
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_allrecord_lock:"
+                                 " can't upgrade a write lock");
+       }
+
+       ntdb->stats.locks++;
+again:
+       /* Lock hashes, gradually. */
+       ecode = ntdb_lock_gradual(ntdb, ltype, flags, NTDB_HASH_LOCK_START,
+                                NTDB_HASH_LOCK_RANGE);
+       if (ecode != NTDB_SUCCESS)
+               return ecode;
+
+       /* Lock free tables: there to end of file. */
+       ecode = ntdb_brlock(ntdb, ltype,
+                          NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE,
+                          0, flags);
+       if (ecode != NTDB_SUCCESS) {
+               ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START,
+                            NTDB_HASH_LOCK_RANGE);
+               return ecode;
+       }
+
+       ntdb->file->allrecord_lock.owner = ntdb;
+       ntdb->file->allrecord_lock.count = 1;
+       /* If it's upgradable, it's actually exclusive so we can treat
+        * it as a write lock. */
+       ntdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
+       ntdb->file->allrecord_lock.off = upgradable;
+
+       /* Now check for needing recovery. */
+       if (flags & NTDB_LOCK_NOCHECK)
+               return NTDB_SUCCESS;
+
+       berr = ntdb_needs_recovery(ntdb);
+       if (likely(berr == false))
+               return NTDB_SUCCESS;
+
+       ntdb_allrecord_unlock(ntdb, ltype);
+       if (berr < 0)
+               return NTDB_OFF_TO_ERR(berr);
+       ecode = ntdb_lock_and_recover(ntdb);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+       goto again;
+}
+
+enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb,
+                            int ltype, enum ntdb_lock_flags flags)
+{
+       return ntdb_nest_lock(ntdb, NTDB_OPEN_LOCK, ltype, flags);
+}
+
+void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype)
+{
+       ntdb_nest_unlock(ntdb, NTDB_OPEN_LOCK, ltype);
+}
+
+bool ntdb_has_open_lock(struct ntdb_context *ntdb)
+{
+       return !(ntdb->flags & NTDB_NOLOCK)
+               && find_nestlock(ntdb, NTDB_OPEN_LOCK, ntdb) != NULL;
+}
+
+enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype)
+{
+       /* Lock doesn't protect data, so don't check (we recurse if we do!) */
+       return ntdb_nest_lock(ntdb, NTDB_EXPANSION_LOCK, ltype,
+                            NTDB_LOCK_WAIT | NTDB_LOCK_NOCHECK);
+}
+
+void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype)
+{
+       ntdb_nest_unlock(ntdb, NTDB_EXPANSION_LOCK, ltype);
+}
+
+/* unlock entire db */
+void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype)
+{
+       if (ntdb->flags & NTDB_NOLOCK)
+               return;
+
+       if (ntdb->file->allrecord_lock.count == 0) {
+               ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+                          "ntdb_allrecord_unlock: not locked!");
+               return;
+       }
+
+       if (ntdb->file->allrecord_lock.owner != ntdb) {
+               ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+                          "ntdb_allrecord_unlock: not locked by us!");
+               return;
+       }
+
+       /* Upgradable locks are marked as write locks. */
+       if (ntdb->file->allrecord_lock.ltype != ltype
+           && (!ntdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
+               ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                          "ntdb_allrecord_unlock: have %s lock",
+                          ntdb->file->allrecord_lock.ltype == F_RDLCK
+                          ? "read" : "write");
+               return;
+       }
+
+       if (ntdb->file->allrecord_lock.count > 1) {
+               ntdb->file->allrecord_lock.count--;
+               return;
+       }
+
+       ntdb->file->allrecord_lock.count = 0;
+       ntdb->file->allrecord_lock.ltype = 0;
+
+       ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START, 0);
+}
+
+bool ntdb_has_expansion_lock(struct ntdb_context *ntdb)
+{
+       return find_nestlock(ntdb, NTDB_EXPANSION_LOCK, ntdb) != NULL;
+}
+
+bool ntdb_has_hash_locks(struct ntdb_context *ntdb)
+{
+       unsigned int i;
+
+       for (i=0; i<ntdb->file->num_lockrecs; i++) {
+               if (ntdb->file->lockrecs[i].off >= NTDB_HASH_LOCK_START
+                   && ntdb->file->lockrecs[i].off < (NTDB_HASH_LOCK_START
+                                                    + NTDB_HASH_LOCK_RANGE))
+                       return true;
+       }
+       return false;
+}
+
+static bool ntdb_has_free_lock(struct ntdb_context *ntdb)
+{
+       unsigned int i;
+
+       if (ntdb->flags & NTDB_NOLOCK)
+               return false;
+
+       for (i=0; i<ntdb->file->num_lockrecs; i++) {
+               if (ntdb->file->lockrecs[i].off
+                   > NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE)
+                       return true;
+       }
+       return false;
+}
+
+enum NTDB_ERROR ntdb_lock_hashes(struct ntdb_context *ntdb,
+                              ntdb_off_t hash_lock,
+                              ntdb_len_t hash_range,
+                              int ltype, enum ntdb_lock_flags waitflag)
+{
+       /* FIXME: Do this properly, using hlock_range */
+       unsigned l = NTDB_HASH_LOCK_START
+               + (hash_lock >> (64 - NTDB_HASH_LOCK_RANGE_BITS));
+
+       /* a allrecord lock allows us to avoid per chain locks */
+       if (ntdb->file->allrecord_lock.count) {
+               if (!check_lock_pid(ntdb, "ntdb_lock_hashes", true))
+                       return NTDB_ERR_LOCK;
+
+               if (ntdb->file->allrecord_lock.owner != ntdb)
+                       return owner_conflict(ntdb, "ntdb_lock_hashes");
+               if (ltype == ntdb->file->allrecord_lock.ltype
+                   || ltype == F_RDLCK) {
+                       return NTDB_SUCCESS;
+               }
+
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+                                 "ntdb_lock_hashes:"
+                                 " already have %s allrecordlock",
+                                 ntdb->file->allrecord_lock.ltype == F_RDLCK
+                                 ? "read" : "write");
+       }
+
+       if (ntdb_has_free_lock(ntdb)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_lock_hashes: already have free lock");
+       }
+
+       if (ntdb_has_expansion_lock(ntdb)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_lock_hashes:"
+                                 " already have expansion lock");
+       }
+
+       return ntdb_nest_lock(ntdb, l, ltype, waitflag);
+}
+
+enum NTDB_ERROR ntdb_unlock_hashes(struct ntdb_context *ntdb,
+                                ntdb_off_t hash_lock,
+                                ntdb_len_t hash_range, int ltype)
+{
+       unsigned l = NTDB_HASH_LOCK_START
+               + (hash_lock >> (64 - NTDB_HASH_LOCK_RANGE_BITS));
+
+       if (ntdb->flags & NTDB_NOLOCK)
+               return 0;
+
+       /* a allrecord lock allows us to avoid per chain locks */
+       if (ntdb->file->allrecord_lock.count) {
+               if (ntdb->file->allrecord_lock.ltype == F_RDLCK
+                   && ltype == F_WRLCK) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                         "ntdb_unlock_hashes RO allrecord!");
+               }
+               if (ntdb->file->allrecord_lock.owner != ntdb) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+                                         "ntdb_unlock_hashes:"
+                                         " not locked by us!");
+               }
+               return NTDB_SUCCESS;
+       }
+
+       return ntdb_nest_unlock(ntdb, l, ltype);
+}
+
+/* Hash locks use NTDB_HASH_LOCK_START + the next 30 bits.
+ * Then we begin; bucket offsets are sizeof(ntdb_len_t) apart, so we divide.
+ * The result is that on 32 bit systems we don't use lock values > 2^31 on
+ * files that are less than 4GB.
+ */
+static ntdb_off_t free_lock_off(ntdb_off_t b_off)
+{
+       return NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE
+               + b_off / sizeof(ntdb_off_t);
+}
+
+enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off,
+                                   enum ntdb_lock_flags waitflag)
+{
+       assert(b_off >= sizeof(struct ntdb_header));
+
+       if (ntdb->flags & NTDB_NOLOCK)
+               return 0;
+
+       /* a allrecord lock allows us to avoid per chain locks */
+       if (ntdb->file->allrecord_lock.count) {
+               if (!check_lock_pid(ntdb, "ntdb_lock_free_bucket", true))
+                       return NTDB_ERR_LOCK;
+
+               if (ntdb->file->allrecord_lock.owner != ntdb) {
+                       return owner_conflict(ntdb, "ntdb_lock_free_bucket");
+               }
+
+               if (ntdb->file->allrecord_lock.ltype == F_WRLCK)
+                       return 0;
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_lock_free_bucket with"
+                                 " read-only allrecordlock!");
+       }
+
+#if 0 /* FIXME */
+       if (ntdb_has_expansion_lock(ntdb)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+                                 "ntdb_lock_free_bucket:"
+                                 " already have expansion lock");
+       }
+#endif
+
+       return ntdb_nest_lock(ntdb, free_lock_off(b_off), F_WRLCK, waitflag);
+}
+
+void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off)
+{
+       if (ntdb->file->allrecord_lock.count)
+               return;
+
+       ntdb_nest_unlock(ntdb, free_lock_off(b_off), F_WRLCK);
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb)
+{
+       return ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false);
+}
+
+_PUBLIC_ void ntdb_unlockall(struct ntdb_context *ntdb)
+{
+       ntdb_allrecord_unlock(ntdb, F_WRLCK);
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb)
+{
+       return ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
+}
+
+_PUBLIC_ void ntdb_unlockall_read(struct ntdb_context *ntdb)
+{
+       ntdb_allrecord_unlock(ntdb, F_RDLCK);
+}
+
+void ntdb_lock_cleanup(struct ntdb_context *ntdb)
+{
+       unsigned int i;
+
+       /* We don't want to warn: they're allowed to close ntdb after fork. */
+       if (!check_lock_pid(ntdb, "ntdb_close", false))
+               return;
+
+       while (ntdb->file->allrecord_lock.count
+              && ntdb->file->allrecord_lock.owner == ntdb) {
+               ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype);
+       }
+
+       for (i=0; i<ntdb->file->num_lockrecs; i++) {
+               if (ntdb->file->lockrecs[i].owner == ntdb) {
+                       ntdb_nest_unlock(ntdb,
+                                       ntdb->file->lockrecs[i].off,
+                                       ntdb->file->lockrecs[i].ltype);
+                       i--;
+               }
+       }
+}
diff --git a/lib/ntdb/ntdb.c b/lib/ntdb/ntdb.c
new file mode 100644 (file)
index 0000000..9f1e327
--- /dev/null
@@ -0,0 +1,605 @@
+ /*
+   Trivial Database 2: fetch, store and misc routines.
+   Copyright (C) Rusty Russell 2010
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#ifndef HAVE_LIBREPLACE
+#include <ccan/asprintf/asprintf.h>
+#include <stdarg.h>
+#endif
+
+static enum NTDB_ERROR update_rec_hdr(struct ntdb_context *ntdb,
+                                    ntdb_off_t off,
+                                    ntdb_len_t keylen,
+                                    ntdb_len_t datalen,
+                                    struct ntdb_used_record *rec,
+                                    uint64_t h)
+{
+       uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
+       enum NTDB_ERROR ecode;
+
+       ecode = set_header(ntdb, rec, NTDB_USED_MAGIC, keylen, datalen,
+                          keylen + dataroom, h);
+       if (ecode == NTDB_SUCCESS) {
+               ecode = ntdb_write_convert(ntdb, off, rec, sizeof(*rec));
+       }
+       return ecode;
+}
+
+static enum NTDB_ERROR replace_data(struct ntdb_context *ntdb,
+                                  struct hash_info *h,
+                                  NTDB_DATA key, NTDB_DATA dbuf,
+                                  ntdb_off_t old_off, ntdb_len_t old_room,
+                                  bool growing)
+{
+       ntdb_off_t new_off;
+       enum NTDB_ERROR ecode;
+
+       /* Allocate a new record. */
+       new_off = alloc(ntdb, key.dsize, dbuf.dsize, h->h, NTDB_USED_MAGIC,
+                       growing);
+       if (NTDB_OFF_IS_ERR(new_off)) {
+               return NTDB_OFF_TO_ERR(new_off);
+       }
+
+       /* We didn't like the existing one: remove it. */
+       if (old_off) {
+               ntdb->stats.frees++;
+               ecode = add_free_record(ntdb, old_off,
+                                       sizeof(struct ntdb_used_record)
+                                       + key.dsize + old_room,
+                                       NTDB_LOCK_WAIT, true);
+               if (ecode == NTDB_SUCCESS)
+                       ecode = replace_in_hash(ntdb, h, new_off);
+       } else {
+               ecode = add_to_hash(ntdb, h, new_off);
+       }
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       new_off += sizeof(struct ntdb_used_record);
+       ecode = ntdb->io->twrite(ntdb, new_off, key.dptr, key.dsize);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       new_off += key.dsize;
+       ecode = ntdb->io->twrite(ntdb, new_off, dbuf.dptr, dbuf.dsize);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       if (ntdb->flags & NTDB_SEQNUM)
+               ntdb_inc_seqnum(ntdb);
+
+       return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR update_data(struct ntdb_context *ntdb,
+                                 ntdb_off_t off,
+                                 NTDB_DATA dbuf,
+                                 ntdb_len_t extra)
+{
+       enum NTDB_ERROR ecode;
+
+       ecode = ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize);
+       if (ecode == NTDB_SUCCESS && extra) {
+               /* Put a zero in; future versions may append other data. */
+               ecode = ntdb->io->twrite(ntdb, off + dbuf.dsize, "", 1);
+       }
+       if (ntdb->flags & NTDB_SEQNUM)
+               ntdb_inc_seqnum(ntdb);
+
+       return ecode;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
+                        NTDB_DATA key, NTDB_DATA dbuf, int flag)
+{
+       struct hash_info h;
+       ntdb_off_t off;
+       ntdb_len_t old_room = 0;
+       struct ntdb_used_record rec;
+       enum NTDB_ERROR ecode;
+
+       off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
+       if (NTDB_OFF_IS_ERR(off)) {
+               return ntdb->last_error = NTDB_OFF_TO_ERR(off);
+       }
+
+       /* Now we have lock on this hash bucket. */
+       if (flag == NTDB_INSERT) {
+               if (off) {
+                       ecode = NTDB_ERR_EXISTS;
+                       goto out;
+               }
+       } else {
+               if (off) {
+                       old_room = rec_data_length(&rec)
+                               + rec_extra_padding(&rec);
+                       if (old_room >= dbuf.dsize) {
+                               /* Can modify in-place.  Easy! */
+                               ecode = update_rec_hdr(ntdb, off,
+                                                      key.dsize, dbuf.dsize,
+                                                      &rec, h.h);
+                               if (ecode != NTDB_SUCCESS) {
+                                       goto out;
+                               }
+                               ecode = update_data(ntdb,
+                                                   off + sizeof(rec)
+                                                   + key.dsize, dbuf,
+                                                   old_room - dbuf.dsize);
+                               if (ecode != NTDB_SUCCESS) {
+                                       goto out;
+                               }
+                               ntdb_unlock_hashes(ntdb, h.hlock_start,
+                                                 h.hlock_range, F_WRLCK);
+                               return ntdb->last_error = NTDB_SUCCESS;
+                       }
+               } else {
+                       if (flag == NTDB_MODIFY) {
+                               /* if the record doesn't exist and we
+                                  are in NTDB_MODIFY mode then we should fail
+                                  the store */
+                               ecode = NTDB_ERR_NOEXIST;
+                               goto out;
+                       }
+               }
+       }
+
+       /* If we didn't use the old record, this implies we're growing. */
+       ecode = replace_data(ntdb, &h, key, dbuf, off, old_room, off);
+out:
+       ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_WRLCK);
+       return ntdb->last_error = ecode;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
+                         NTDB_DATA key, NTDB_DATA dbuf)
+{
+       struct hash_info h;
+       ntdb_off_t off;
+       struct ntdb_used_record rec;
+       ntdb_len_t old_room = 0, old_dlen;
+       unsigned char *newdata;
+       NTDB_DATA new_dbuf;
+       enum NTDB_ERROR ecode;
+
+       off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
+       if (NTDB_OFF_IS_ERR(off)) {
+               return ntdb->last_error = NTDB_OFF_TO_ERR(off);
+       }
+
+       if (off) {
+               old_dlen = rec_data_length(&rec);
+               old_room = old_dlen + rec_extra_padding(&rec);
+
+               /* Fast path: can append in place. */
+               if (rec_extra_padding(&rec) >= dbuf.dsize) {
+                       ecode = update_rec_hdr(ntdb, off, key.dsize,
+                                              old_dlen + dbuf.dsize, &rec,
+                                              h.h);
+                       if (ecode != NTDB_SUCCESS) {
+                               goto out;
+                       }
+
+                       off += sizeof(rec) + key.dsize + old_dlen;
+                       ecode = update_data(ntdb, off, dbuf,
+                                           rec_extra_padding(&rec));
+                       goto out;
+               }
+
+               /* Slow path. */
+               newdata = malloc(key.dsize + old_dlen + dbuf.dsize);
+               if (!newdata) {
+                       ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                          "ntdb_append:"
+                                          " failed to allocate %zu bytes",
+                                          (size_t)(key.dsize + old_dlen
+                                                   + dbuf.dsize));
+                       goto out;
+               }
+               ecode = ntdb->io->tread(ntdb, off + sizeof(rec) + key.dsize,
+                                      newdata, old_dlen);
+               if (ecode != NTDB_SUCCESS) {
+                       goto out_free_newdata;
+               }
+               memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
+               new_dbuf.dptr = newdata;
+               new_dbuf.dsize = old_dlen + dbuf.dsize;
+       } else {
+               newdata = NULL;
+               new_dbuf = dbuf;
+       }
+
+       /* If they're using ntdb_append(), it implies they're growing record. */
+       ecode = replace_data(ntdb, &h, key, new_dbuf, off, old_room, true);
+
+out_free_newdata:
+       free(newdata);
+out:
+       ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_WRLCK);
+       return ntdb->last_error = ecode;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
+                        NTDB_DATA *data)
+{
+       ntdb_off_t off;
+       struct ntdb_used_record rec;
+       struct hash_info h;
+       enum NTDB_ERROR ecode;
+
+       off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
+       if (NTDB_OFF_IS_ERR(off)) {
+               return ntdb->last_error = NTDB_OFF_TO_ERR(off);
+       }
+
+       if (!off) {
+               ecode = NTDB_ERR_NOEXIST;
+       } else {
+               data->dsize = rec_data_length(&rec);
+               data->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec) + key.dsize,
+                                           data->dsize);
+               if (NTDB_PTR_IS_ERR(data->dptr)) {
+                       ecode = NTDB_PTR_ERR(data->dptr);
+               } else
+                       ecode = NTDB_SUCCESS;
+       }
+
+       ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+       return ntdb->last_error = ecode;
+}
+
+_PUBLIC_ bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+       ntdb_off_t off;
+       struct ntdb_used_record rec;
+       struct hash_info h;
+
+       off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
+       if (NTDB_OFF_IS_ERR(off)) {
+               ntdb->last_error = NTDB_OFF_TO_ERR(off);
+               return false;
+       }
+       ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+
+       ntdb->last_error = NTDB_SUCCESS;
+       return off ? true : false;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+       ntdb_off_t off;
+       struct ntdb_used_record rec;
+       struct hash_info h;
+       enum NTDB_ERROR ecode;
+
+       off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
+       if (NTDB_OFF_IS_ERR(off)) {
+               return ntdb->last_error = NTDB_OFF_TO_ERR(off);
+       }
+
+       if (!off) {
+               ecode = NTDB_ERR_NOEXIST;
+               goto unlock;
+       }
+
+       ecode = delete_from_hash(ntdb, &h);
+       if (ecode != NTDB_SUCCESS) {
+               goto unlock;
+       }
+
+       /* Free the deleted entry. */
+       ntdb->stats.frees++;
+       ecode = add_free_record(ntdb, off,
+                               sizeof(struct ntdb_used_record)
+                               + rec_key_length(&rec)
+                               + rec_data_length(&rec)
+                               + rec_extra_padding(&rec),
+                               NTDB_LOCK_WAIT, true);
+
+       if (ntdb->flags & NTDB_SEQNUM)
+               ntdb_inc_seqnum(ntdb);
+
+unlock:
+       ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_WRLCK);
+       return ntdb->last_error = ecode;
+}
+
+_PUBLIC_ unsigned int ntdb_get_flags(struct ntdb_context *ntdb)
+{
+       return ntdb->flags;
+}
+
+static bool inside_transaction(const struct ntdb_context *ntdb)
+{
+       return ntdb->transaction != NULL;
+}
+
+static bool readonly_changable(struct ntdb_context *ntdb, const char *caller)
+{
+       if (inside_transaction(ntdb)) {
+               ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                            NTDB_LOG_USE_ERROR,
+                                            "%s: can't change"
+                                            " NTDB_RDONLY inside transaction",
+                                            caller);
+               return false;
+       }
+       return true;
+}
+
+_PUBLIC_ void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag)
+{
+       if (ntdb->flags & NTDB_INTERNAL) {
+               ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                            NTDB_LOG_USE_ERROR,
+                                            "ntdb_add_flag: internal db");
+               return;
+       }
+       switch (flag) {
+       case NTDB_NOLOCK:
+               ntdb->flags |= NTDB_NOLOCK;
+               break;
+       case NTDB_NOMMAP:
+               ntdb->flags |= NTDB_NOMMAP;
+#ifndef HAVE_INCOHERENT_MMAP
+               ntdb_munmap(ntdb->file);
+#endif
+               break;
+       case NTDB_NOSYNC:
+               ntdb->flags |= NTDB_NOSYNC;
+               break;
+       case NTDB_SEQNUM:
+               ntdb->flags |= NTDB_SEQNUM;
+               break;
+       case NTDB_ALLOW_NESTING:
+               ntdb->flags |= NTDB_ALLOW_NESTING;
+               break;
+       case NTDB_RDONLY:
+               if (readonly_changable(ntdb, "ntdb_add_flag"))
+                       ntdb->flags |= NTDB_RDONLY;
+               break;
+       default:
+               ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                            NTDB_LOG_USE_ERROR,
+                                            "ntdb_add_flag: Unknown flag %u",
+                                            flag);
+       }
+}
+
+_PUBLIC_ void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag)
+{
+       if (ntdb->flags & NTDB_INTERNAL) {
+               ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                            NTDB_LOG_USE_ERROR,
+                                            "ntdb_remove_flag: internal db");
+               return;
+       }
+       switch (flag) {
+       case NTDB_NOLOCK:
+               ntdb->flags &= ~NTDB_NOLOCK;
+               break;
+       case NTDB_NOMMAP:
+               ntdb->flags &= ~NTDB_NOMMAP;
+#ifndef HAVE_INCOHERENT_MMAP
+               /* If mmap incoherent, we were mmaping anyway. */
+               ntdb_mmap(ntdb);
+#endif
+               break;
+       case NTDB_NOSYNC:
+               ntdb->flags &= ~NTDB_NOSYNC;
+               break;
+       case NTDB_SEQNUM:
+               ntdb->flags &= ~NTDB_SEQNUM;
+               break;
+       case NTDB_ALLOW_NESTING:
+               ntdb->flags &= ~NTDB_ALLOW_NESTING;
+               break;
+       case NTDB_RDONLY:
+               if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) {
+                       ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                                    NTDB_LOG_USE_ERROR,
+                                                    "ntdb_remove_flag: can't"
+                                                    " remove NTDB_RDONLY on ntdb"
+                                                    " opened with O_RDONLY");
+                       break;
+               }
+               if (readonly_changable(ntdb, "ntdb_remove_flag"))
+                       ntdb->flags &= ~NTDB_RDONLY;
+               break;
+       default:
+               ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                            NTDB_LOG_USE_ERROR,
+                                            "ntdb_remove_flag: Unknown flag %u",
+                                            flag);
+       }
+}
+
+_PUBLIC_ const char *ntdb_errorstr(enum NTDB_ERROR ecode)
+{
+       /* Gcc warns if you miss a case in the switch, so use that. */
+       switch (NTDB_ERR_TO_OFF(ecode)) {
+       case NTDB_ERR_TO_OFF(NTDB_SUCCESS): return "Success";
+       case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT): return "Corrupt database";
+       case NTDB_ERR_TO_OFF(NTDB_ERR_IO): return "IO Error";
+       case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK): return "Locking error";
+       case NTDB_ERR_TO_OFF(NTDB_ERR_OOM): return "Out of memory";
+       case NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS): return "Record exists";
+       case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL): return "Invalid parameter";
+       case NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST): return "Record does not exist";
+       case NTDB_ERR_TO_OFF(NTDB_ERR_RDONLY): return "write not permitted";
+       }
+       return "Invalid error code";
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_error(struct ntdb_context *ntdb)
+{
+       return ntdb->last_error;
+}
+
+enum NTDB_ERROR COLD ntdb_logerr(struct ntdb_context *ntdb,
+                              enum NTDB_ERROR ecode,
+                              enum ntdb_log_level level,
+                              const char *fmt, ...)
+{
+       char *message;
+       va_list ap;
+       size_t len;
+       /* ntdb_open paths care about errno, so save it. */
+       int saved_errno = errno;
+
+       if (!ntdb->log_fn)
+               return ecode;
+
+       va_start(ap, fmt);
+       len = vasprintf(&message, fmt, ap);
+       va_end(ap);
+
+       if (len < 0) {
+               ntdb->log_fn(ntdb, NTDB_LOG_ERROR, NTDB_ERR_OOM,
+                           "out of memory formatting message:", ntdb->log_data);
+               ntdb->log_fn(ntdb, level, ecode, fmt, ntdb->log_data);
+       } else {
+               ntdb->log_fn(ntdb, level, ecode, message, ntdb->log_data);
+               free(message);
+       }
+       errno = saved_errno;
+       return ecode;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
+                                NTDB_DATA key,
+                                enum NTDB_ERROR (*parse)(NTDB_DATA k,
+                                                        NTDB_DATA d,
+                                                        void *data),
+                                void *data)
+{
+       ntdb_off_t off;
+       struct ntdb_used_record rec;
+       struct hash_info h;
+       enum NTDB_ERROR ecode;
+
+       off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
+       if (NTDB_OFF_IS_ERR(off)) {
+               return ntdb->last_error = NTDB_OFF_TO_ERR(off);
+       }
+
+       if (!off) {
+               ecode = NTDB_ERR_NOEXIST;
+       } else {
+               const void *dptr;
+               dptr = ntdb_access_read(ntdb, off + sizeof(rec) + key.dsize,
+                                      rec_data_length(&rec), false);
+               if (NTDB_PTR_IS_ERR(dptr)) {
+                       ecode = NTDB_PTR_ERR(dptr);
+               } else {
+                       NTDB_DATA d = ntdb_mkdata(dptr, rec_data_length(&rec));
+
+                       ecode = parse(key, d, data);
+                       ntdb_access_release(ntdb, dptr);
+               }
+       }
+
+       ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+       return ntdb->last_error = ecode;
+}
+
+_PUBLIC_ const char *ntdb_name(const struct ntdb_context *ntdb)
+{
+       return ntdb->name;
+}
+
+_PUBLIC_ int64_t ntdb_get_seqnum(struct ntdb_context *ntdb)
+{
+       ntdb_off_t off;
+
+       off = ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
+       if (NTDB_OFF_IS_ERR(off))
+               ntdb->last_error = NTDB_OFF_TO_ERR(off);
+       else
+               ntdb->last_error = NTDB_SUCCESS;
+       return off;
+}
+
+
+_PUBLIC_ int ntdb_fd(const struct ntdb_context *ntdb)
+{
+       return ntdb->file->fd;
+}
+
+struct traverse_state {
+       enum NTDB_ERROR error;
+       struct ntdb_context *dest_db;
+};
+
+/*
+  traverse function for repacking
+ */
+static int repack_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA data,
+                          struct traverse_state *state)
+{
+       state->error = ntdb_store(state->dest_db, key, data, NTDB_INSERT);
+       if (state->error != NTDB_SUCCESS) {
+               return -1;
+       }
+       return 0;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb)
+{
+       struct ntdb_context *tmp_db;
+       struct traverse_state state;
+
+       state.error = ntdb_transaction_start(ntdb);
+       if (state.error != NTDB_SUCCESS) {
+               return state.error;
+       }
+
+       tmp_db = ntdb_open("tmpdb", NTDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL);
+       if (tmp_db == NULL) {
+               state.error = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                        __location__
+                                        " Failed to create tmp_db");
+               ntdb_transaction_cancel(ntdb);
+               return ntdb->last_error = state.error;
+       }
+
+       state.dest_db = tmp_db;
+       if (ntdb_traverse(ntdb, repack_traverse, &state) < 0) {
+               goto fail;
+       }
+
+       state.error = ntdb_wipe_all(ntdb);
+       if (state.error != NTDB_SUCCESS) {
+               goto fail;
+       }
+
+       state.dest_db = ntdb;
+       if (ntdb_traverse(tmp_db, repack_traverse, &state) < 0) {
+               goto fail;
+       }
+
+       ntdb_close(tmp_db);
+       return ntdb_transaction_commit(ntdb);
+
+fail:
+       ntdb_transaction_cancel(ntdb);
+       ntdb_close(tmp_db);
+       return state.error;
+}
diff --git a/lib/ntdb/ntdb.h b/lib/ntdb/ntdb.h
new file mode 100644 (file)
index 0000000..f0833b7
--- /dev/null
@@ -0,0 +1,901 @@
+#ifndef CCAN_NTDB_H
+#define CCAN_NTDB_H
+
+/*
+   NTDB: trivial database library version 2
+
+   Copyright (C) Andrew Tridgell 1999-2004
+   Copyright (C) Rusty Russell 2010-2012
+
+     ** NOTE! The following LGPL license applies to the ntdb
+     ** library. This does NOT imply that all of Samba is released
+     ** under the LGPL
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_LIBREPLACE
+#include <replace.h>
+#else
+#if HAVE_FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+/* For mode_t */
+#include <sys/types.h>
+/* For O_* flags. */
+#include <sys/stat.h>
+/* For sig_atomic_t. */
+#include <signal.h>
+/* For uint64_t */
+#include <stdint.h>
+/* For bool */
+#include <stdbool.h>
+/* For memcmp */
+#include <string.h>
+#endif
+
+#if HAVE_CCAN
+#include <ccan/compiler/compiler.h>
+#include <ccan/typesafe_cb/typesafe_cb.h>
+#include <ccan/cast/cast.h>
+#else
+#ifndef typesafe_cb_preargs
+/* Failing to have CCAN just mean less typesafe protection, etc. */
+#define typesafe_cb_preargs(rtype, atype, fn, arg, ...)        \
+       ((rtype (*)(__VA_ARGS__, atype))(fn))
+#endif
+#ifndef cast_const
+#if defined(__intptr_t_defined) || defined(HAVE_INTPTR_T)
+#define cast_const(type, expr) ((type)((intptr_t)(expr)))
+#else
+#define cast_const(type, expr) ((type *)(expr))
+#endif
+#endif
+#endif /* !HAVE_CCAN */
+
+union ntdb_attribute;
+struct ntdb_context;
+
+/**
+ * struct TDB_DATA - (n)tdb data blob
+ *
+ * To ease compatibility, we use 'struct TDB_DATA' from tdb.h, so if
+ * you want to include both tdb.h and ntdb.h, you need to #include
+ * tdb.h first.
+ */
+#ifndef __TDB_H__
+struct TDB_DATA {
+       unsigned char *dptr;
+       size_t dsize;
+};
+#endif
+
+typedef struct TDB_DATA NTDB_DATA;
+
+/**
+ * ntdb_open - open a database file
+ * @name: the file name (can be NULL if flags contains NTDB_INTERNAL)
+ * @ntdb_flags: options for this database
+ * @open_flags: flags argument for ntdb's open() call.
+ * @mode: mode argument for ntdb's open() call.
+ * @attributes: linked list of extra attributes for this ntdb.
+ *
+ * This call opens (and potentially creates) a database file.
+ * Multiple processes can have the NTDB file open at once.
+ *
+ * On failure it will return NULL, and set errno: it may also call
+ * any log attribute found in @attributes.
+ *
+ * See also:
+ *     union ntdb_attribute
+ */
+struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
+                              int open_flags, mode_t mode,
+                              union ntdb_attribute *attributes);
+
+
+/* flags for ntdb_open() */
+#define NTDB_DEFAULT 0 /* just a readability place holder */
+#define NTDB_INTERNAL 2 /* don't store on disk */
+#define NTDB_NOLOCK   4 /* don't do any locking */
+#define NTDB_NOMMAP   8 /* don't use mmap */
+#define NTDB_CONVERT 16 /* convert endian */
+#define NTDB_NOSYNC   64 /* don't use synchronous transactions */
+#define NTDB_SEQNUM   128 /* maintain a sequence number */
+#define NTDB_ALLOW_NESTING   256 /* fake nested transactions */
+#define NTDB_RDONLY   512 /* implied by O_RDONLY */
+#define NTDB_CANT_CHECK  2048 /* has a feature which we don't understand */
+
+/**
+ * ntdb_close - close and free a ntdb.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This always succeeds, in that @ntdb is unusable after this call.  But if
+ * some unexpected error occurred while closing, it will return non-zero
+ * (the only clue as to cause will be via the log attribute).
+ */
+int ntdb_close(struct ntdb_context *ntdb);
+
+/**
+ * enum NTDB_ERROR - error returns for NTDB
+ *
+ * See Also:
+ *     ntdb_errorstr()
+ */
+enum NTDB_ERROR {
+       NTDB_SUCCESS    = 0,    /* No error. */
+       NTDB_ERR_CORRUPT = -1,  /* We read the db, and it was bogus. */
+       NTDB_ERR_IO     = -2,   /* We couldn't read/write the db. */
+       NTDB_ERR_LOCK   = -3,   /* Locking failed. */
+       NTDB_ERR_OOM    = -4,   /* Out of Memory. */
+       NTDB_ERR_EXISTS = -5,   /* The key already exists. */
+       NTDB_ERR_NOEXIST        = -6,   /* The key does not exist. */
+       NTDB_ERR_EINVAL = -7,   /* You're using it wrong. */
+       NTDB_ERR_RDONLY = -8,   /* The database is read-only. */
+       NTDB_ERR_LAST = NTDB_ERR_RDONLY
+};
+
+/**
+ * ntdb_store - store a key/value pair in a ntdb.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key
+ * @dbuf: the data to associate with the key.
+ * @flag: NTDB_REPLACE, NTDB_INSERT or NTDB_MODIFY.
+ *
+ * This inserts (or overwrites) a key/value pair in the NTDB.  If flag
+ * is NTDB_REPLACE, it doesn't matter whether the key exists or not;
+ * NTDB_INSERT means it must not exist (returns NTDB_ERR_EXISTS otherwise),
+ * and NTDB_MODIFY means it must exist (returns NTDB_ERR_NOEXIST otherwise).
+ *
+ * On success, this returns NTDB_SUCCESS.
+ *
+ * See also:
+ *     ntdb_fetch, ntdb_transaction_start, ntdb_append, ntdb_delete.
+ */
+enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
+                          NTDB_DATA key,
+                          NTDB_DATA dbuf,
+                          int flag);
+
+/* flags to ntdb_store() */
+#define NTDB_REPLACE 1         /* A readability place holder */
+#define NTDB_INSERT 2          /* Don't overwrite an existing entry */
+#define NTDB_MODIFY 3          /* Don't create an existing entry    */
+
+/**
+ * ntdb_fetch - fetch a value from a ntdb.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key
+ * @data: pointer to data.
+ *
+ * This looks up a key in the database and sets it in @data.
+ *
+ * If it returns NTDB_SUCCESS, the key was found: it is your
+ * responsibility to call free() on @data->dptr.
+ *
+ * Otherwise, it returns an error (usually, NTDB_ERR_NOEXIST) and @data is
+ * undefined.
+ */
+enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
+                          NTDB_DATA *data);
+
+/**
+ * ntdb_errorstr - map the ntdb error onto a constant readable string
+ * @ecode: the enum NTDB_ERROR to map.
+ *
+ * This is useful for displaying errors to users.
+ */
+const char *ntdb_errorstr(enum NTDB_ERROR ecode);
+
+/**
+ * ntdb_append - append a value to a key/value pair in a ntdb.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key
+ * @dbuf: the data to append.
+ *
+ * This is equivalent to fetching a record, reallocating .dptr to add the
+ * data, and writing it back, only it's much more efficient.  If the key
+ * doesn't exist, it's equivalent to ntdb_store (with an additional hint that
+ * you expect to expand the record in future).
+ *
+ * See Also:
+ *     ntdb_fetch(), ntdb_store()
+ */
+enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
+                           NTDB_DATA key, NTDB_DATA dbuf);
+
+/**
+ * ntdb_delete - delete a key from a ntdb.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to delete.
+ *
+ * Returns NTDB_SUCCESS on success, or an error (usually NTDB_ERR_NOEXIST).
+ *
+ * See Also:
+ *     ntdb_fetch(), ntdb_store()
+ */
+enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_exists - does a key exist in the database?
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to search for.
+ *
+ * Returns true if it exists, or false if it doesn't or any other error.
+ */
+bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_deq - are NTDB_DATA equal?
+ * @a: one NTDB_DATA
+ * @b: another NTDB_DATA
+ */
+static inline bool ntdb_deq(NTDB_DATA a, NTDB_DATA b)
+{
+       return a.dsize == b.dsize && memcmp(a.dptr, b.dptr, a.dsize) == 0;
+}
+
+/**
+ * ntdb_mkdata - make a NTDB_DATA from const data
+ * @p: the constant pointer
+ * @len: the length
+ *
+ * As the dptr member of NTDB_DATA is not constant, you need to
+ * cast it.  This function keeps thost casts in one place, as well as
+ * suppressing the warning some compilers give when casting away a
+ * qualifier (eg. gcc with -Wcast-qual)
+ */
+static inline NTDB_DATA ntdb_mkdata(const void *p, size_t len)
+{
+       NTDB_DATA d;
+       d.dptr = cast_const(void *, p);
+       d.dsize = len;
+       return d;
+}
+
+/**
+ * ntdb_transaction_start - start a transaction
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This begins a series of atomic operations.  Other processes will be able
+ * to read the ntdb, but not alter it (they will block), nor will they see
+ * any changes until ntdb_transaction_commit() is called.
+ *
+ * Note that if the NTDB_ALLOW_NESTING flag is set, a ntdb_transaction_start()
+ * within a transaction will succeed, but it's not a real transaction:
+ * (1) An inner transaction which is committed is not actually committed until
+ *     the outer transaction is; if the outer transaction is cancelled, the
+ *     inner ones are discarded.
+ * (2) ntdb_transaction_cancel() marks the outer transaction as having an error,
+ *     so the final ntdb_transaction_commit() will fail.
+ * (3) the outer transaction will see the results of the inner transaction.
+ *
+ * See Also:
+ *     ntdb_transaction_cancel, ntdb_transaction_commit.
+ */
+enum NTDB_ERROR ntdb_transaction_start(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_transaction_cancel - abandon a transaction
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This aborts a transaction, discarding any changes which were made.
+ * ntdb_close() does this implicitly.
+ */
+void ntdb_transaction_cancel(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_transaction_commit - commit a transaction
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This completes a transaction, writing any changes which were made.
+ *
+ * fsync() is used to commit the transaction (unless NTDB_NOSYNC is set),
+ * making it robust against machine crashes, but very slow compared to
+ * other NTDB operations.
+ *
+ * A failure can only be caused by unexpected errors (eg. I/O or
+ * memory); this is no point looping on transaction failure.
+ *
+ * See Also:
+ *     ntdb_transaction_prepare_commit()
+ */
+enum NTDB_ERROR ntdb_transaction_commit(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_transaction_prepare_commit - prepare to commit a transaction
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This ensures we have the resources to commit a transaction (using
+ * ntdb_transaction_commit): if this succeeds then a transaction will only
+ * fail if the write() or fsync() calls fail.
+ *
+ * If this fails you must still call ntdb_transaction_cancel() to cancel
+ * the transaction.
+ *
+ * See Also:
+ *     ntdb_transaction_commit()
+ */
+enum NTDB_ERROR ntdb_transaction_prepare_commit(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_traverse - traverse a NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @fn: the function to call for every key/value pair (or NULL)
+ * @p: the pointer to hand to @f
+ *
+ * This walks the NTDB until all they keys have been traversed, or @fn
+ * returns non-zero.  If the traverse function or other processes are
+ * changing data or adding or deleting keys, the traverse may be
+ * unreliable: keys may be skipped or (rarely) visited twice.
+ *
+ * There is one specific exception: the special case of deleting the
+ * current key does not undermine the reliability of the traversal.
+ *
+ * On success, returns the number of keys iterated.  On error returns
+ * a negative enum NTDB_ERROR value.
+ */
+#define ntdb_traverse(ntdb, fn, p)                                     \
+       ntdb_traverse_(ntdb, typesafe_cb_preargs(int, void *, (fn), (p), \
+                                                struct ntdb_context *, \
+                                                NTDB_DATA, NTDB_DATA), (p))
+
+int64_t ntdb_traverse_(struct ntdb_context *ntdb,
+                      int (*fn)(struct ntdb_context *,
+                                NTDB_DATA, NTDB_DATA, void *), void *p);
+
+/**
+ * ntdb_parse_record - operate directly on data in the database.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key whose record we should hand to @parse
+ * @parse: the function to call for the data
+ * @data: the private pointer to hand to @parse (types must match).
+ *
+ * This avoids a copy for many cases, by handing you a pointer into
+ * the memory-mapped database.  It also locks the record to prevent
+ * other accesses at the same time.
+ *
+ * Do not alter the data handed to parse()!
+ */
+#define ntdb_parse_record(ntdb, key, parse, data)                      \
+       ntdb_parse_record_((ntdb), (key),                               \
+                          typesafe_cb_preargs(enum NTDB_ERROR, void *, \
+                                              (parse), (data),         \
+                                              NTDB_DATA, NTDB_DATA), (data))
+
+enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
+                                  NTDB_DATA key,
+                                  enum NTDB_ERROR (*parse)(NTDB_DATA k,
+                                                           NTDB_DATA d,
+                                                           void *data),
+                                  void *data);
+
+/**
+ * ntdb_get_seqnum - get a database sequence number
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This returns a sequence number: any change to the database from a
+ * ntdb context opened with the NTDB_SEQNUM flag will cause that number
+ * to increment.  Note that the incrementing is unreliable (it is done
+ * without locking), so this is only useful as an optimization.
+ *
+ * For example, you may have a regular database backup routine which
+ * does not operate if the sequence number is unchanged.  In the
+ * unlikely event of a failed increment, it will be backed up next
+ * time any way.
+ *
+ * Returns an enum NTDB_ERROR (ie. negative) on error.
+ */
+int64_t ntdb_get_seqnum(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_firstkey - get the "first" key in a NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: pointer to key.
+ *
+ * This returns an arbitrary key in the database; with ntdb_nextkey() it allows
+ * open-coded traversal of the database, though it is slightly less efficient
+ * than ntdb_traverse.
+ *
+ * It is your responsibility to free @key->dptr on success.
+ *
+ * Returns NTDB_ERR_NOEXIST if the database is empty.
+ */
+enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key);
+
+/**
+ * ntdb_nextkey - get the "next" key in a NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: a key returned by ntdb_firstkey() or ntdb_nextkey().
+ *
+ * This returns another key in the database; it will free @key.dptr for
+ * your convenience.
+ *
+ * Returns NTDB_ERR_NOEXIST if there are no more keys.
+ */
+enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key);
+
+/**
+ * ntdb_chainlock - lock a record in the NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to lock.
+ *
+ * This prevents any access occurring to a group of keys including @key,
+ * even if @key does not exist.  This allows primitive atomic updates of
+ * records without using transactions.
+ *
+ * You cannot begin a transaction while holding a ntdb_chainlock(), nor can
+ * you do any operations on any other keys in the database.  This also means
+ * that you cannot hold more than one ntdb_chainlock() at a time.
+ *
+ * See Also:
+ *     ntdb_chainunlock()
+ */
+enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_chainunlock - unlock a record in the NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to unlock.
+ *
+ * The key must have previously been locked by ntdb_chainlock().
+ */
+void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_chainlock_read - lock a record in the NTDB, for reading
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to lock.
+ *
+ * This prevents any changes from occurring to a group of keys including @key,
+ * even if @key does not exist.  This allows primitive atomic updates of
+ * records without using transactions.
+ *
+ * You cannot begin a transaction while holding a ntdb_chainlock_read(), nor can
+ * you do any operations on any other keys in the database.  This also means
+ * that you cannot hold more than one ntdb_chainlock()/read() at a time.
+ *
+ * See Also:
+ *     ntdb_chainlock()
+ */
+enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_chainunlock_read - unlock a record in the NTDB for reading
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to unlock.
+ *
+ * The key must have previously been locked by ntdb_chainlock_read().
+ */
+void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_lockall - lock the entire NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * You cannot hold a ntdb_chainlock while calling this.  It nests, so you
+ * must call ntdb_unlockall as many times as you call ntdb_lockall.
+ */
+enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_unlockall - unlock the entire NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ */
+void ntdb_unlockall(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_lockall_read - lock the entire NTDB for reading
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This prevents others writing to the database, eg. ntdb_delete, ntdb_store,
+ * ntdb_append, but not ntdb_fetch.
+ *
+ * You cannot hold a ntdb_chainlock while calling this.  It nests, so you
+ * must call ntdb_unlockall_read as many times as you call ntdb_lockall_read.
+ */
+enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_unlockall_read - unlock the entire NTDB for reading
+ * @ntdb: the ntdb context returned from ntdb_open()
+ */
+void ntdb_unlockall_read(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_wipe_all - wipe the database clean
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * Completely erase the database.  This is faster than iterating through
+ * each key and doing ntdb_delete.
+ */
+enum NTDB_ERROR ntdb_wipe_all(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_repack - repack the database
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This repacks the database; if it is suffering from a great deal of
+ * fragmentation this might help.  However, it can take twice the
+ * memory of the existing NTDB.
+ */
+enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_check - check a NTDB for consistency
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @check: function to check each key/data pair (or NULL)
+ * @data: argument for @check, must match type.
+ *
+ * This performs a consistency check of the open database, optionally calling
+ * a check() function on each record so you can do your own data consistency
+ * checks as well.  If check() returns an error, that is returned from
+ * ntdb_check().
+ *
+ * Note that the NTDB uses a feature which we don't understand which
+ * indicates we can't run ntdb_check(), this will log a warning to that
+ * effect and return NTDB_SUCCESS.  You can detect this condition by
+ * looking for NTDB_CANT_CHECK in ntdb_get_flags().
+ *
+ * Returns NTDB_SUCCESS or an error.
+ */
+#define ntdb_check(ntdb, check, data)                                  \
+       ntdb_check_((ntdb), typesafe_cb_preargs(enum NTDB_ERROR, void *, \
+                                               (check), (data),        \
+                                               NTDB_DATA,              \
+                                               NTDB_DATA),             \
+                   (data))
+
+enum NTDB_ERROR ntdb_check_(struct ntdb_context *ntdb,
+                           enum NTDB_ERROR (*check)(NTDB_DATA k,
+                                                    NTDB_DATA d,
+                                                    void *data),
+                           void *data);
+
+/**
+ * ntdb_error - get the last error (not threadsafe)
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * Returns the last error returned by a NTDB function.
+ *
+ * This makes porting from TDB easier, but note that the last error is not
+ * reliable in threaded programs.
+ */
+enum NTDB_ERROR ntdb_error(struct ntdb_context *ntdb);
+
+/**
+ * enum ntdb_summary_flags - flags for ntdb_summary.
+ */
+enum ntdb_summary_flags {
+       NTDB_SUMMARY_HISTOGRAMS = 1 /* Draw graphs in the summary. */
+};
+
+/**
+ * ntdb_summary - return a string describing the NTDB state
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @flags: flags to control the summary output.
+ * @summary: pointer to string to allocate.
+ *
+ * This returns a developer-readable string describing the overall
+ * state of the ntdb, such as the percentage used and sizes of records.
+ * It is designed to provide information about the ntdb at a glance
+ * without displaying any keys or data in the database.
+ *
+ * On success, sets @summary to point to a malloc()'ed nul-terminated
+ * multi-line string.  It is your responsibility to free() it.
+ */
+enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb,
+                            enum ntdb_summary_flags flags,
+                            char **summary);
+
+
+/**
+ * ntdb_get_flags - return the flags for a ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This returns the flags on the current ntdb.  Some of these are caused by
+ * the flags argument to ntdb_open(), others (such as NTDB_CONVERT) are
+ * intuited.
+ */
+unsigned int ntdb_get_flags(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_add_flag - set a flag for a ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @flag: one of NTDB_NOLOCK, NTDB_NOMMAP, NTDB_NOSYNC or NTDB_ALLOW_NESTING.
+ *
+ * You can use this to set a flag on the NTDB.  You cannot set these flags
+ * on a NTDB_INTERNAL ntdb.
+ */
+void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag);
+
+/**
+ * ntdb_remove_flag - unset a flag for a ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @flag: one of NTDB_NOLOCK, NTDB_NOMMAP, NTDB_NOSYNC or NTDB_ALLOW_NESTING.
+ *
+ * You can use this to clear a flag on the NTDB.  You cannot clear flags
+ * on a NTDB_INTERNAL ntdb.
+ */
+void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag);
+
+/**
+ * enum ntdb_attribute_type - descriminator for union ntdb_attribute.
+ */
+enum ntdb_attribute_type {
+       NTDB_ATTRIBUTE_LOG = 0,
+       NTDB_ATTRIBUTE_HASH = 1,
+       NTDB_ATTRIBUTE_SEED = 2,
+       NTDB_ATTRIBUTE_STATS = 3,
+       NTDB_ATTRIBUTE_OPENHOOK = 4,
+       NTDB_ATTRIBUTE_FLOCK = 5,
+};
+
+/**
+ * ntdb_get_attribute - get an attribute for an existing ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @attr: the union ntdb_attribute to set.
+ *
+ * This gets an attribute from a NTDB which has previously been set (or
+ * may return the default values).  Set @attr.base.attr to the
+ * attribute type you want get.
+ */
+enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb,
+                                  union ntdb_attribute *attr);
+
+/**
+ * ntdb_set_attribute - set an attribute for an existing ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @attr: the union ntdb_attribute to set.
+ *
+ * This sets an attribute on a NTDB, overriding any previous attribute
+ * of the same type.  It returns NTDB_ERR_EINVAL if the attribute is
+ * unknown or invalid.
+ *
+ * Note that NTDB_ATTRIBUTE_HASH, NTDB_ATTRIBUTE_SEED, and
+ * NTDB_ATTRIBUTE_OPENHOOK cannot currently be set after ntdb_open.
+ */
+enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb,
+                                  const union ntdb_attribute *attr);
+
+/**
+ * ntdb_unset_attribute - reset an attribute for an existing ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @type: the attribute type to unset.
+ *
+ * This unsets an attribute on a NTDB, returning it to the defaults
+ * (where applicable).
+ *
+ * Note that it only makes sense for NTDB_ATTRIBUTE_LOG and NTDB_ATTRIBUTE_FLOCK
+ * to be unset.
+ */
+void ntdb_unset_attribute(struct ntdb_context *ntdb,
+                         enum ntdb_attribute_type type);
+
+/**
+ * ntdb_name - get the name of a ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This returns a copy of the name string, made at ntdb_open() time.  If that
+ * argument was NULL (possible for a NTDB_INTERNAL db) this will return NULL.
+ *
+ * This is mostly useful for logging.
+ */
+const char *ntdb_name(const struct ntdb_context *ntdb);
+
+/**
+ * ntdb_fd - get the file descriptor of a ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This returns the file descriptor for the underlying database file, or -1
+ * for NTDB_INTERNAL.
+ */
+int ntdb_fd(const struct ntdb_context *ntdb);
+
+/**
+ * ntdb_foreach - iterate through every open NTDB.
+ * @fn: the function to call for every NTDB
+ * @p: the pointer to hand to @fn
+ *
+ * NTDB internally keeps track of all open TDBs; this function allows you to
+ * iterate through them.  If @fn returns non-zero, traversal stops.
+ */
+#define ntdb_foreach(fn, p)                                            \
+       ntdb_foreach_(typesafe_cb_preargs(int, void *, (fn), (p),       \
+                                         struct ntdb_context *), (p))
+
+void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p);
+
+/**
+ * struct ntdb_attribute_base - common fields for all ntdb attributes.
+ */
+struct ntdb_attribute_base {
+       enum ntdb_attribute_type attr;
+       union ntdb_attribute *next;
+};
+
+/**
+ * enum ntdb_log_level - log levels for ntdb_attribute_log
+ * @NTDB_LOG_ERROR: used to log unrecoverable errors such as I/O errors
+ *                or internal consistency failures.
+ * @NTDB_LOG_USE_ERROR: used to log usage errors such as invalid parameters
+ *                or writing to a read-only database.
+ * @NTDB_LOG_WARNING: used for informational messages on issues which
+ *                  are unusual but handled by NTDB internally, such
+ *                  as a failure to mmap or failure to open /dev/urandom.
+ */
+enum ntdb_log_level {
+       NTDB_LOG_ERROR,
+       NTDB_LOG_USE_ERROR,
+       NTDB_LOG_WARNING
+};
+
+/**
+ * struct ntdb_attribute_log - log function attribute
+ *
+ * This attribute provides a hook for you to log errors.
+ */
+struct ntdb_attribute_log {
+       struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_LOG */
+       void (*fn)(struct ntdb_context *ntdb,
+                  enum ntdb_log_level level,
+                  enum NTDB_ERROR ecode,
+                  const char *message,
+                  void *data);
+       void *data;
+};
+
+/**
+ * struct ntdb_attribute_hash - hash function attribute
+ *
+ * This attribute allows you to provide an alternative hash function.
+ * This hash function will be handed keys from the database; it will also
+ * be handed the 8-byte NTDB_HASH_MAGIC value for checking the header (the
+ * ntdb_open() will fail if the hash value doesn't match the header).
+ *
+ * Note that if your hash function gives different results on
+ * different machine endians, your ntdb will no longer work across
+ * different architectures!
+ */
+struct ntdb_attribute_hash {
+       struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASH */
+       uint64_t (*fn)(const void *key, size_t len, uint64_t seed,
+                      void *data);
+       void *data;
+};
+
+/**
+ * struct ntdb_attribute_seed - hash function seed attribute
+ *
+ * The hash function seed is normally taken from /dev/urandom (or equivalent)
+ * but can be set manually here.  This is mainly for testing purposes.
+ */
+struct ntdb_attribute_seed {
+       struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_SEED */
+       uint64_t seed;
+};
+
+/**
+ * struct ntdb_attribute_stats - ntdb operational statistics
+ *
+ * This attribute records statistics of various low-level NTDB operations.
+ * This can be used to assist performance evaluation.  This is only
+ * useful for ntdb_get_attribute().
+ *
+ * New fields will be added at the end, hence the "size" argument which
+ * indicates how large your structure is: it must be filled in before
+ * calling ntdb_get_attribute(), which will overwrite it with the size
+ * ntdb knows about.
+ */
+struct ntdb_attribute_stats {
+       struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_STATS */
+       size_t size; /* = sizeof(struct ntdb_attribute_stats) */
+       uint64_t allocs;
+       uint64_t   alloc_subhash;
+       uint64_t   alloc_chain;
+       uint64_t   alloc_bucket_exact;
+       uint64_t   alloc_bucket_max;
+       uint64_t   alloc_leftover;
+       uint64_t   alloc_coalesce_tried;
+       uint64_t     alloc_coalesce_iterate_clash;
+       uint64_t     alloc_coalesce_lockfail;
+       uint64_t     alloc_coalesce_race;
+       uint64_t     alloc_coalesce_succeeded;
+       uint64_t       alloc_coalesce_num_merged;
+       uint64_t compares;
+       uint64_t   compare_wrong_bucket;
+       uint64_t   compare_wrong_offsetbits;
+       uint64_t   compare_wrong_keylen;
+       uint64_t   compare_wrong_rechash;
+       uint64_t   compare_wrong_keycmp;
+       uint64_t transactions;
+       uint64_t   transaction_cancel;
+       uint64_t   transaction_nest;
+       uint64_t   transaction_expand_file;
+       uint64_t   transaction_read_direct;
+       uint64_t      transaction_read_direct_fail;
+       uint64_t   transaction_write_direct;
+       uint64_t      transaction_write_direct_fail;
+       uint64_t expands;
+       uint64_t frees;
+       uint64_t locks;
+       uint64_t   lock_lowlevel;
+       uint64_t   lock_nonblock;
+       uint64_t     lock_nonblock_fail;
+};
+
+/**
+ * struct ntdb_attribute_openhook - ntdb special effects hook for open
+ *
+ * This attribute contains a function to call once we have the OPEN_LOCK
+ * for the ntdb, but before we've examined its contents.  If this succeeds,
+ * the ntdb will be populated if it's then zero-length.
+ *
+ * This is a hack to allow support for TDB-style TDB_CLEAR_IF_FIRST
+ * behaviour.
+ */
+struct ntdb_attribute_openhook {
+       struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_OPENHOOK */
+       enum NTDB_ERROR (*fn)(int fd, void *data);
+       void *data;
+};
+
+/**
+ * struct ntdb_attribute_flock - ntdb special effects hook for file locking
+ *
+ * This attribute contains function to call to place locks on a file; it can
+ * be used to support non-blocking operations or lock proxying.
+ *
+ * They should return 0 on success, -1 on failure and set errno.
+ *
+ * An error will be logged on error if errno is neither EAGAIN nor EINTR
+ * (normally it would only return EAGAIN if waitflag is false, and
+ * loop internally on EINTR).
+ */
+struct ntdb_attribute_flock {
+       struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_FLOCK */
+       int (*lock)(int fd,int rw, off_t off, off_t len, bool waitflag, void *);
+       int (*unlock)(int fd, int rw, off_t off, off_t len, void *);
+       void *data;
+};
+
+/**
+ * union ntdb_attribute - ntdb attributes.
+ *
+ * This represents all the known attributes.
+ *
+ * See also:
+ *     struct ntdb_attribute_log, struct ntdb_attribute_hash,
+ *     struct ntdb_attribute_seed, struct ntdb_attribute_stats,
+ *     struct ntdb_attribute_openhook, struct ntdb_attribute_flock.
+ */
+union ntdb_attribute {
+       struct ntdb_attribute_base base;
+       struct ntdb_attribute_log log;
+       struct ntdb_attribute_hash hash;
+       struct ntdb_attribute_seed seed;
+       struct ntdb_attribute_stats stats;
+       struct ntdb_attribute_openhook openhook;
+       struct ntdb_attribute_flock flock;
+};
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* ntdb.h */
similarity index 64%
rename from lib/tdb2/tdb.pc.in
rename to lib/ntdb/ntdb.pc.in
index 75e69d7363e0f9f628d55fdc82a6499012e7d853..36a7d5136c2426a2847d4f54623093dadf2df849 100644 (file)
@@ -3,9 +3,9 @@ exec_prefix=@exec_prefix@
 libdir=@libdir@
 includedir=@includedir@
 
-Name: tdb
-Description: A trivial database
+Name: ntdb
+Description: A (not-so) trivial database
 Version: @PACKAGE_VERSION@
-Libs: @LIB_RPATH@ -L${libdir} -ltdb
+Libs: @LIB_RPATH@ -L${libdir} -lntdb
 Cflags: -I${includedir}
 URL: http://tdb.samba.org/
diff --git a/lib/ntdb/open.c b/lib/ntdb/open.c
new file mode 100644 (file)
index 0000000..338de8b
--- /dev/null
@@ -0,0 +1,768 @@
+ /*
+   Trivial Database 2: opening and closing TDBs
+   Copyright (C) Rusty Russell 2010
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/build_assert/build_assert.h>
+#include <assert.h>
+
+/* all tdbs, to detect double-opens (fcntl file don't nest!) */
+static struct ntdb_context *tdbs = NULL;
+
+static struct ntdb_file *find_file(dev_t device, ino_t ino)
+{
+       struct ntdb_context *i;
+
+       for (i = tdbs; i; i = i->next) {
+               if (i->file->device == device && i->file->inode == ino) {
+                       i->file->refcnt++;
+                       return i->file;
+               }
+       }
+       return NULL;
+}
+
+static bool read_all(int fd, void *buf, size_t len)
+{
+       while (len) {
+               ssize_t ret;
+               ret = read(fd, buf, len);
+               if (ret < 0)
+                       return false;
+               if (ret == 0) {
+                       /* ETOOSHORT? */
+                       errno = EWOULDBLOCK;
+                       return false;
+               }
+               buf = (char *)buf + ret;
+               len -= ret;
+       }
+       return true;
+}
+
+static uint64_t random_number(struct ntdb_context *ntdb)
+{
+       int fd;
+       uint64_t ret = 0;
+       struct timeval now;
+
+       fd = open("/dev/urandom", O_RDONLY);
+       if (fd >= 0) {
+               if (read_all(fd, &ret, sizeof(ret))) {
+                       close(fd);
+                       return ret;
+               }
+               close(fd);
+       }
+       /* FIXME: Untested!  Based on Wikipedia protocol description! */
+       fd = open("/dev/egd-pool", O_RDWR);
+       if (fd >= 0) {
+               /* Command is 1, next byte is size we want to read. */
+               char cmd[2] = { 1, sizeof(uint64_t) };
+               if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
+                       char reply[1 + sizeof(uint64_t)];
+                       int r = read(fd, reply, sizeof(reply));
+                       if (r > 1) {
+                               /* Copy at least some bytes. */
+                               memcpy(&ret, reply+1, r - 1);
+                               if (reply[0] == sizeof(uint64_t)
+                                   && r == sizeof(reply)) {
+                                       close(fd);
+                                       return ret;
+                               }
+                       }
+               }
+               close(fd);
+       }
+
+       /* Fallback: pid and time. */
+       gettimeofday(&now, NULL);
+       ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
+       ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
+                  "ntdb_open: random from getpid and time");
+       return ret;
+}
+
+static void ntdb_context_init(struct ntdb_context *ntdb)
+{
+       /* Initialize the NTDB fields here */
+       ntdb_io_init(ntdb);
+       ntdb->direct_access = 0;
+       ntdb->transaction = NULL;
+       ntdb->access = NULL;
+}
+
+struct new_database {
+       struct ntdb_header hdr;
+       struct ntdb_freetable ftable;
+};
+
+/* initialise a new database */
+static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb,
+                                      struct ntdb_attribute_seed *seed,
+                                      struct ntdb_header *hdr)
+{
+       /* We make it up in memory, then write it out if not internal */
+       struct new_database newdb;
+       unsigned int magic_len;
+       ssize_t rlen;
+       enum NTDB_ERROR ecode;
+
+       /* Fill in the header */
+       newdb.hdr.version = NTDB_VERSION;
+       if (seed)
+               newdb.hdr.hash_seed = seed->seed;
+       else
+               newdb.hdr.hash_seed = random_number(ntdb);
+       newdb.hdr.hash_test = NTDB_HASH_MAGIC;
+       newdb.hdr.hash_test = ntdb->hash_fn(&newdb.hdr.hash_test,
+                                          sizeof(newdb.hdr.hash_test),
+                                          newdb.hdr.hash_seed,
+                                          ntdb->hash_data);
+       newdb.hdr.recovery = 0;
+       newdb.hdr.features_used = newdb.hdr.features_offered = NTDB_FEATURE_MASK;
+       newdb.hdr.seqnum = 0;
+       newdb.hdr.capabilities = 0;
+       memset(newdb.hdr.reserved, 0, sizeof(newdb.hdr.reserved));
+       /* Initial hashes are empty. */
+       memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable));
+
+       /* Free is empty. */
+       newdb.hdr.free_table = offsetof(struct new_database, ftable);
+       memset(&newdb.ftable, 0, sizeof(newdb.ftable));
+       ecode = set_header(NULL, &newdb.ftable.hdr, NTDB_FTABLE_MAGIC, 0,
+                          sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
+                          sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
+                          0);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       /* Magic food */
+       memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
+       strcpy(newdb.hdr.magic_food, NTDB_MAGIC_FOOD);
+
+       /* This creates an endian-converted database, as if read from disk */
+       magic_len = sizeof(newdb.hdr.magic_food);
+       ntdb_convert(ntdb,
+                   (char *)&newdb.hdr + magic_len, sizeof(newdb) - magic_len);
+
+       *hdr = newdb.hdr;
+
+       if (ntdb->flags & NTDB_INTERNAL) {
+               ntdb->file->map_size = sizeof(newdb);
+               ntdb->file->map_ptr = malloc(ntdb->file->map_size);
+               if (!ntdb->file->map_ptr) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                         "ntdb_new_database:"
+                                         " failed to allocate");
+               }
+               memcpy(ntdb->file->map_ptr, &newdb, ntdb->file->map_size);
+               return NTDB_SUCCESS;
+       }
+       if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) {
+               return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                 "ntdb_new_database:"
+                                 " failed to seek: %s", strerror(errno));
+       }
+
+       if (ftruncate(ntdb->file->fd, 0) == -1) {
+               return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                 "ntdb_new_database:"
+                                 " failed to truncate: %s", strerror(errno));
+       }
+
+       rlen = write(ntdb->file->fd, &newdb, sizeof(newdb));
+       if (rlen != sizeof(newdb)) {
+               if (rlen >= 0)
+                       errno = ENOSPC;
+               return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                 "ntdb_new_database: %zi writing header: %s",
+                                 rlen, strerror(errno));
+       }
+       return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR ntdb_new_file(struct ntdb_context *ntdb)
+{
+       ntdb->file = malloc(sizeof(*ntdb->file));
+       if (!ntdb->file)
+               return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                 "ntdb_open: cannot alloc ntdb_file structure");
+       ntdb->file->num_lockrecs = 0;
+       ntdb->file->lockrecs = NULL;
+       ntdb->file->allrecord_lock.count = 0;
+       ntdb->file->refcnt = 1;
+       ntdb->file->map_ptr = NULL;
+       return NTDB_SUCCESS;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb,
+                                const union ntdb_attribute *attr)
+{
+       switch (attr->base.attr) {
+       case NTDB_ATTRIBUTE_LOG:
+               ntdb->log_fn = attr->log.fn;
+               ntdb->log_data = attr->log.data;
+               break;
+       case NTDB_ATTRIBUTE_HASH:
+       case NTDB_ATTRIBUTE_SEED:
+       case NTDB_ATTRIBUTE_OPENHOOK:
+               return ntdb->last_error
+                       = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                    NTDB_LOG_USE_ERROR,
+                                    "ntdb_set_attribute:"
+                                    " cannot set %s after opening",
+                                    attr->base.attr == NTDB_ATTRIBUTE_HASH
+                                    ? "NTDB_ATTRIBUTE_HASH"
+                                    : attr->base.attr == NTDB_ATTRIBUTE_SEED
+                                    ? "NTDB_ATTRIBUTE_SEED"
+                                    : "NTDB_ATTRIBUTE_OPENHOOK");
+       case NTDB_ATTRIBUTE_STATS:
+               return ntdb->last_error
+                       = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                    NTDB_LOG_USE_ERROR,
+                                    "ntdb_set_attribute:"
+                                    " cannot set NTDB_ATTRIBUTE_STATS");
+       case NTDB_ATTRIBUTE_FLOCK:
+               ntdb->lock_fn = attr->flock.lock;
+               ntdb->unlock_fn = attr->flock.unlock;
+               ntdb->lock_data = attr->flock.data;
+               break;
+       default:
+               return ntdb->last_error
+                       = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                    NTDB_LOG_USE_ERROR,
+                                    "ntdb_set_attribute:"
+                                    " unknown attribute type %u",
+                                    attr->base.attr);
+       }
+       return NTDB_SUCCESS;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb,
+                                union ntdb_attribute *attr)
+{
+       switch (attr->base.attr) {
+       case NTDB_ATTRIBUTE_LOG:
+               if (!ntdb->log_fn)
+                       return ntdb->last_error = NTDB_ERR_NOEXIST;
+               attr->log.fn = ntdb->log_fn;
+               attr->log.data = ntdb->log_data;
+               break;
+       case NTDB_ATTRIBUTE_HASH:
+               attr->hash.fn = ntdb->hash_fn;
+               attr->hash.data = ntdb->hash_data;
+               break;
+       case NTDB_ATTRIBUTE_SEED:
+               attr->seed.seed = ntdb->hash_seed;
+               break;
+       case NTDB_ATTRIBUTE_OPENHOOK:
+               if (!ntdb->openhook)
+                       return ntdb->last_error = NTDB_ERR_NOEXIST;
+               attr->openhook.fn = ntdb->openhook;
+               attr->openhook.data = ntdb->openhook_data;
+               break;
+       case NTDB_ATTRIBUTE_STATS: {
+               size_t size = attr->stats.size;
+               if (size > ntdb->stats.size)
+                       size = ntdb->stats.size;
+               memcpy(&attr->stats, &ntdb->stats, size);
+               break;
+       }
+       case NTDB_ATTRIBUTE_FLOCK:
+               attr->flock.lock = ntdb->lock_fn;
+               attr->flock.unlock = ntdb->unlock_fn;
+               attr->flock.data = ntdb->lock_data;
+               break;
+       default:
+               return ntdb->last_error
+                       = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                    NTDB_LOG_USE_ERROR,
+                                    "ntdb_get_attribute:"
+                                    " unknown attribute type %u",
+                                    attr->base.attr);
+       }
+       attr->base.next = NULL;
+       return NTDB_SUCCESS;
+}
+
+_PUBLIC_ void ntdb_unset_attribute(struct ntdb_context *ntdb,
+                        enum ntdb_attribute_type type)
+{
+       switch (type) {
+       case NTDB_ATTRIBUTE_LOG:
+               ntdb->log_fn = NULL;
+               break;
+       case NTDB_ATTRIBUTE_OPENHOOK:
+               ntdb->openhook = NULL;
+               break;
+       case NTDB_ATTRIBUTE_HASH:
+       case NTDB_ATTRIBUTE_SEED:
+               ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+                          "ntdb_unset_attribute: cannot unset %s after opening",
+                          type == NTDB_ATTRIBUTE_HASH
+                          ? "NTDB_ATTRIBUTE_HASH"
+                          : "NTDB_ATTRIBUTE_SEED");
+               break;
+       case NTDB_ATTRIBUTE_STATS:
+               ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                          NTDB_LOG_USE_ERROR,
+                          "ntdb_unset_attribute:"
+                          "cannot unset NTDB_ATTRIBUTE_STATS");
+               break;
+       case NTDB_ATTRIBUTE_FLOCK:
+               ntdb->lock_fn = ntdb_fcntl_lock;
+               ntdb->unlock_fn = ntdb_fcntl_unlock;
+               break;
+       default:
+               ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                          NTDB_LOG_USE_ERROR,
+                          "ntdb_unset_attribute: unknown attribute type %u",
+                          type);
+       }
+}
+
+/* The top three bits of the capability tell us whether it matters. */
+enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller,
+                                 ntdb_off_t type)
+{
+       if (type & NTDB_CAP_NOOPEN) {
+               return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                 "%s: file has unknown capability %llu",
+                                 caller, type & NTDB_CAP_NOOPEN);
+       }
+
+       if ((type & NTDB_CAP_NOWRITE) && !(ntdb->flags & NTDB_RDONLY)) {
+               return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_ERROR,
+                                 "%s: file has unknown capability %llu"
+                                 " (cannot write to it)",
+                                 caller, type & NTDB_CAP_NOOPEN);
+       }
+
+       if (type & NTDB_CAP_NOCHECK) {
+               ntdb->flags |= NTDB_CANT_CHECK;
+       }
+       return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR capabilities_ok(struct ntdb_context *ntdb,
+                                     ntdb_off_t capabilities)
+{
+       ntdb_off_t off, next;
+       enum NTDB_ERROR ecode = NTDB_SUCCESS;
+       const struct ntdb_capability *cap;
+
+       /* Check capability list. */
+       for (off = capabilities; off && ecode == NTDB_SUCCESS; off = next) {
+               cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
+               if (NTDB_PTR_IS_ERR(cap)) {
+                       return NTDB_PTR_ERR(cap);
+               }
+
+               switch (cap->type & NTDB_CAP_TYPE_MASK) {
+               /* We don't understand any capabilities (yet). */
+               default:
+                       ecode = unknown_capability(ntdb, "ntdb_open", cap->type);
+               }
+               next = cap->next;
+               ntdb_access_release(ntdb, cap);
+       }
+       return ecode;
+}
+
+_PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
+                            int open_flags, mode_t mode,
+                            union ntdb_attribute *attr)
+{
+       struct ntdb_context *ntdb;
+       struct stat st;
+       int saved_errno = 0;
+       uint64_t hash_test;
+       unsigned v;
+       ssize_t rlen;
+       struct ntdb_header hdr;
+       struct ntdb_attribute_seed *seed = NULL;
+       ntdb_bool_err berr;
+       enum NTDB_ERROR ecode;
+       int openlock;
+
+       ntdb = malloc(sizeof(*ntdb) + (name ? strlen(name) + 1 : 0));
+       if (!ntdb) {
+               /* Can't log this */
+               errno = ENOMEM;
+               return NULL;
+       }
+       /* Set name immediately for logging functions. */
+       if (name) {
+               ntdb->name = strcpy((char *)(ntdb + 1), name);
+       } else {
+               ntdb->name = NULL;
+       }
+       ntdb->flags = ntdb_flags;
+       ntdb->log_fn = NULL;
+       ntdb->open_flags = open_flags;
+       ntdb->last_error = NTDB_SUCCESS;
+       ntdb->file = NULL;
+       ntdb->openhook = NULL;
+       ntdb->lock_fn = ntdb_fcntl_lock;
+       ntdb->unlock_fn = ntdb_fcntl_unlock;
+       ntdb->hash_fn = ntdb_jenkins_hash;
+       memset(&ntdb->stats, 0, sizeof(ntdb->stats));
+       ntdb->stats.base.attr = NTDB_ATTRIBUTE_STATS;
+       ntdb->stats.size = sizeof(ntdb->stats);
+
+       while (attr) {
+               switch (attr->base.attr) {
+               case NTDB_ATTRIBUTE_HASH:
+                       ntdb->hash_fn = attr->hash.fn;
+                       ntdb->hash_data = attr->hash.data;
+                       break;
+               case NTDB_ATTRIBUTE_SEED:
+                       seed = &attr->seed;
+                       break;
+               case NTDB_ATTRIBUTE_OPENHOOK:
+                       ntdb->openhook = attr->openhook.fn;
+                       ntdb->openhook_data = attr->openhook.data;
+                       break;
+               default:
+                       /* These are set as normal. */
+                       ecode = ntdb_set_attribute(ntdb, attr);
+                       if (ecode != NTDB_SUCCESS)
+                               goto fail;
+               }
+               attr = attr->base.next;
+       }
+
+       if (ntdb_flags & ~(NTDB_INTERNAL | NTDB_NOLOCK | NTDB_NOMMAP | NTDB_CONVERT
+                         | NTDB_NOSYNC | NTDB_SEQNUM | NTDB_ALLOW_NESTING
+                         | NTDB_RDONLY)) {
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+                                  "ntdb_open: unknown flags %u", ntdb_flags);
+               goto fail;
+       }
+
+       if (seed) {
+               if (!(ntdb_flags & NTDB_INTERNAL) && !(open_flags & O_CREAT)) {
+                       ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                          NTDB_LOG_USE_ERROR,
+                                          "ntdb_open:"
+                                          " cannot set NTDB_ATTRIBUTE_SEED"
+                                          " without O_CREAT.");
+                       goto fail;
+               }
+       }
+
+       if ((open_flags & O_ACCMODE) == O_WRONLY) {
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+                                  "ntdb_open: can't open ntdb %s write-only",
+                                  name);
+               goto fail;
+       }
+
+       if ((open_flags & O_ACCMODE) == O_RDONLY) {
+               openlock = F_RDLCK;
+               ntdb->flags |= NTDB_RDONLY;
+       } else {
+               if (ntdb_flags & NTDB_RDONLY) {
+                       ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                          NTDB_LOG_USE_ERROR,
+                                          "ntdb_open: can't use NTDB_RDONLY"
+                                          " without O_RDONLY");
+                       goto fail;
+               }
+               openlock = F_WRLCK;
+       }
+
+       /* internal databases don't need any of the rest. */
+       if (ntdb->flags & NTDB_INTERNAL) {
+               ntdb->flags |= (NTDB_NOLOCK | NTDB_NOMMAP);
+               ecode = ntdb_new_file(ntdb);
+               if (ecode != NTDB_SUCCESS) {
+                       goto fail;
+               }
+               ntdb->file->fd = -1;
+               ecode = ntdb_new_database(ntdb, seed, &hdr);
+               if (ecode == NTDB_SUCCESS) {
+                       ntdb_convert(ntdb, &hdr.hash_seed,
+                                   sizeof(hdr.hash_seed));
+                       ntdb->hash_seed = hdr.hash_seed;
+                       ntdb_context_init(ntdb);
+                       ntdb_ftable_init(ntdb);
+               }
+               if (ecode != NTDB_SUCCESS) {
+                       goto fail;
+               }
+               return ntdb;
+       }
+
+       if (stat(name, &st) != -1)
+               ntdb->file = find_file(st.st_dev, st.st_ino);
+
+       if (!ntdb->file) {
+               int fd;
+
+               if ((fd = open(name, open_flags, mode)) == -1) {
+                       /* errno set by open(2) */
+                       saved_errno = errno;
+                       ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                  "ntdb_open: could not open file %s: %s",
+                                  name, strerror(errno));
+                       goto fail_errno;
+               }
+
+               /* on exec, don't inherit the fd */
+               v = fcntl(fd, F_GETFD, 0);
+               fcntl(fd, F_SETFD, v | FD_CLOEXEC);
+
+               if (fstat(fd, &st) == -1) {
+                       saved_errno = errno;
+                       ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                  "ntdb_open: could not stat open %s: %s",
+                                  name, strerror(errno));
+                       close(fd);
+                       goto fail_errno;
+               }
+
+               ecode = ntdb_new_file(ntdb);
+               if (ecode != NTDB_SUCCESS) {
+                       close(fd);
+                       goto fail;
+               }
+
+               ntdb->file->fd = fd;
+               ntdb->file->device = st.st_dev;
+               ntdb->file->inode = st.st_ino;
+               ntdb->file->map_ptr = NULL;
+               ntdb->file->map_size = 0;
+       }
+
+       /* ensure there is only one process initialising at once */
+       ecode = ntdb_lock_open(ntdb, openlock, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
+       if (ecode != NTDB_SUCCESS) {
+               saved_errno = errno;
+               goto fail_errno;
+       }
+
+       /* call their open hook if they gave us one. */
+       if (ntdb->openhook) {
+               ecode = ntdb->openhook(ntdb->file->fd, ntdb->openhook_data);
+               if (ecode != NTDB_SUCCESS) {
+                       ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                  "ntdb_open: open hook failed");
+                       goto fail;
+               }
+               open_flags |= O_CREAT;
+       }
+
+       /* If they used O_TRUNC, read will return 0. */
+       rlen = pread(ntdb->file->fd, &hdr, sizeof(hdr), 0);
+       if (rlen == 0 && (open_flags & O_CREAT)) {
+               ecode = ntdb_new_database(ntdb, seed, &hdr);
+               if (ecode != NTDB_SUCCESS) {
+                       goto fail;
+               }
+       } else if (rlen < 0) {
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                  "ntdb_open: error %s reading %s",
+                                  strerror(errno), name);
+               goto fail;
+       } else if (rlen < sizeof(hdr)
+                  || strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) {
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                  "ntdb_open: %s is not a ntdb file", name);
+               goto fail;
+       }
+
+       if (hdr.version != NTDB_VERSION) {
+               if (hdr.version == bswap_64(NTDB_VERSION))
+                       ntdb->flags |= NTDB_CONVERT;
+               else {
+                       /* wrong version */
+                       ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                          "ntdb_open:"
+                                          " %s is unknown version 0x%llx",
+                                          name, (long long)hdr.version);
+                       goto fail;
+               }
+       } else if (ntdb->flags & NTDB_CONVERT) {
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                  "ntdb_open:"
+                                  " %s does not need NTDB_CONVERT",
+                                  name);
+               goto fail;
+       }
+
+       ntdb_context_init(ntdb);
+
+       ntdb_convert(ntdb, &hdr, sizeof(hdr));
+       ntdb->hash_seed = hdr.hash_seed;
+       hash_test = NTDB_HASH_MAGIC;
+       hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
+       if (hdr.hash_test != hash_test) {
+               /* wrong hash variant */
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                  "ntdb_open:"
+                                  " %s uses a different hash function",
+                                  name);
+               goto fail;
+       }
+
+       ecode = capabilities_ok(ntdb, hdr.capabilities);
+       if (ecode != NTDB_SUCCESS) {
+               goto fail;
+       }
+
+       /* Clear any features we don't understand. */
+       if ((open_flags & O_ACCMODE) != O_RDONLY) {
+               hdr.features_used &= NTDB_FEATURE_MASK;
+               ecode = ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
+                                                       features_used),
+                                         &hdr.features_used,
+                                         sizeof(hdr.features_used));
+               if (ecode != NTDB_SUCCESS)
+                       goto fail;
+       }
+
+       ntdb_unlock_open(ntdb, openlock);
+
+       /* This makes sure we have current map_size and mmap. */
+       ecode = ntdb->io->oob(ntdb, ntdb->file->map_size, 1, true);
+       if (unlikely(ecode != NTDB_SUCCESS))
+               goto fail;
+
+       /* Now it's fully formed, recover if necessary. */
+       berr = ntdb_needs_recovery(ntdb);
+       if (unlikely(berr != false)) {
+               if (berr < 0) {
+                       ecode = NTDB_OFF_TO_ERR(berr);
+                       goto fail;
+               }
+               ecode = ntdb_lock_and_recover(ntdb);
+               if (ecode != NTDB_SUCCESS) {
+                       goto fail;
+               }
+       }
+
+       ecode = ntdb_ftable_init(ntdb);
+       if (ecode != NTDB_SUCCESS) {
+               goto fail;
+       }
+
+       ntdb->next = tdbs;
+       tdbs = ntdb;
+       return ntdb;
+
+ fail:
+       /* Map ecode to some logical errno. */
+       switch (NTDB_ERR_TO_OFF(ecode)) {
+       case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT):
+       case NTDB_ERR_TO_OFF(NTDB_ERR_IO):
+               saved_errno = EIO;
+               break;
+       case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK):
+               saved_errno = EWOULDBLOCK;
+               break;
+       case NTDB_ERR_TO_OFF(NTDB_ERR_OOM):
+               saved_errno = ENOMEM;
+               break;
+       case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL):
+               saved_errno = EINVAL;
+               break;
+       default:
+               saved_errno = EINVAL;
+               break;
+       }
+
+fail_errno:
+#ifdef NTDB_TRACE
+       close(ntdb->tracefd);
+#endif
+       if (ntdb->file) {
+               ntdb_lock_cleanup(ntdb);
+               if (--ntdb->file->refcnt == 0) {
+                       assert(ntdb->file->num_lockrecs == 0);
+                       if (ntdb->file->map_ptr) {
+                               if (ntdb->flags & NTDB_INTERNAL) {
+                                       free(ntdb->file->map_ptr);
+                               } else
+                                       ntdb_munmap(ntdb->file);
+                       }
+                       if (close(ntdb->file->fd) != 0)
+                               ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                          "ntdb_open: failed to close ntdb fd"
+                                          " on error: %s", strerror(errno));
+                       free(ntdb->file->lockrecs);
+                       free(ntdb->file);
+               }
+       }
+
+       free(ntdb);
+       errno = saved_errno;
+       return NULL;
+}
+
+_PUBLIC_ int ntdb_close(struct ntdb_context *ntdb)
+{
+       int ret = 0;
+       struct ntdb_context **i;
+
+       ntdb_trace(ntdb, "ntdb_close");
+
+       if (ntdb->transaction) {
+               ntdb_transaction_cancel(ntdb);
+       }
+
+       if (ntdb->file->map_ptr) {
+               if (ntdb->flags & NTDB_INTERNAL)
+                       free(ntdb->file->map_ptr);
+               else
+                       ntdb_munmap(ntdb->file);
+       }
+       if (ntdb->file) {
+               ntdb_lock_cleanup(ntdb);
+               if (--ntdb->file->refcnt == 0) {
+                       ret = close(ntdb->file->fd);
+                       free(ntdb->file->lockrecs);
+                       free(ntdb->file);
+               }
+       }
+
+       /* Remove from tdbs list */
+       for (i = &tdbs; *i; i = &(*i)->next) {
+               if (*i == ntdb) {
+                       *i = ntdb->next;
+                       break;
+               }
+       }
+
+#ifdef NTDB_TRACE
+       close(ntdb->tracefd);
+#endif
+       free(ntdb);
+
+       return ret;
+}
+
+_PUBLIC_ void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p)
+{
+       struct ntdb_context *i;
+
+       for (i = tdbs; i; i = i->next) {
+               if (fn(i, p) != 0)
+                       break;
+       }
+}
diff --git a/lib/ntdb/private.h b/lib/ntdb/private.h
new file mode 100644 (file)
index 0000000..1cf9b7a
--- /dev/null
@@ -0,0 +1,657 @@
+#ifndef NTDB_PRIVATE_H
+#define NTDB_PRIVATE_H
+/*
+  Trivial Database 2: private types and prototypes
+  Copyright (C) Rusty Russell 2010
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 3 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "config.h"
+#ifndef HAVE_CCAN
+#error You need ccan to build ntdb!
+#endif
+#include "ntdb.h"
+#include <ccan/compiler/compiler.h>
+#include <ccan/likely/likely.h>
+#include <ccan/endian/endian.h>
+
+#ifdef HAVE_LIBREPLACE
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/time.h"
+#include "system/shmem.h"
+#include "system/select.h"
+#include "system/wait.h"
+#else
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <utime.h>
+#include <unistd.h>
+#endif
+
+#ifndef TEST_IT
+#define TEST_IT(cond)
+#endif
+
+/* #define NTDB_TRACE 1 */
+
+#ifndef __STRING
+#define __STRING(x)    #x
+#endif
+
+#ifndef __STRINGSTRING
+#define __STRINGSTRING(x) __STRING(x)
+#endif
+
+#ifndef __location__
+#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__)
+#endif
+
+typedef uint64_t ntdb_len_t;
+typedef uint64_t ntdb_off_t;
+
+#define NTDB_MAGIC_FOOD "NTDB file\n"
+#define NTDB_VERSION ((uint64_t)(0x26011967 + 7))
+#define NTDB_USED_MAGIC ((uint64_t)0x1999)
+#define NTDB_HTABLE_MAGIC ((uint64_t)0x1888)
+#define NTDB_CHAIN_MAGIC ((uint64_t)0x1777)
+#define NTDB_FTABLE_MAGIC ((uint64_t)0x1666)
+#define NTDB_CAP_MAGIC ((uint64_t)0x1555)
+#define NTDB_FREE_MAGIC ((uint64_t)0xFE)
+#define NTDB_HASH_MAGIC (0xA1ABE11A01092008ULL)
+#define NTDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL)
+#define NTDB_RECOVERY_INVALID_MAGIC (0x0ULL)
+
+/* Capability bits. */
+#define NTDB_CAP_TYPE_MASK     0x1FFFFFFFFFFFFFFFULL
+#define NTDB_CAP_NOCHECK               0x8000000000000000ULL
+#define NTDB_CAP_NOWRITE               0x4000000000000000ULL
+#define NTDB_CAP_NOOPEN                0x2000000000000000ULL
+
+#define NTDB_OFF_IS_ERR(off) unlikely(off >= (ntdb_off_t)(long)NTDB_ERR_LAST)
+#define NTDB_OFF_TO_ERR(off) ((enum NTDB_ERROR)(long)(off))
+#define NTDB_ERR_TO_OFF(ecode) ((ntdb_off_t)(long)(ecode))
+
+/* Packing errors into pointers and v.v. */
+#define NTDB_PTR_IS_ERR(ptr)                                           \
+       unlikely((unsigned long)(ptr) >= (unsigned long)NTDB_ERR_LAST)
+#define NTDB_PTR_ERR(p) ((enum NTDB_ERROR)(long)(p))
+#define NTDB_ERR_PTR(err) ((void *)(long)(err))
+
+/* Common case of returning true, false or -ve error. */
+typedef int ntdb_bool_err;
+
+/* Prevent others from opening the file. */
+#define NTDB_OPEN_LOCK 0
+/* Expanding file. */
+#define NTDB_EXPANSION_LOCK 2
+/* Doing a transaction. */
+#define NTDB_TRANSACTION_LOCK 8
+/* Hash chain locks. */
+#define NTDB_HASH_LOCK_START 64
+
+/* Range for hash locks. */
+#define NTDB_HASH_LOCK_RANGE_BITS 30
+#define NTDB_HASH_LOCK_RANGE (1 << NTDB_HASH_LOCK_RANGE_BITS)
+
+/* We have 1024 entries in the top level. */
+#define NTDB_TOPLEVEL_HASH_BITS 10
+/* And 64 entries in each sub-level: thus 64 bits exactly after 9 levels. */
+#define NTDB_SUBLEVEL_HASH_BITS 6
+/* And 8 entries in each group, ie 8 groups per sublevel. */
+#define NTDB_HASH_GROUP_BITS 3
+/* This is currently 10: beyond this we chain. */
+#define NTDB_MAX_LEVELS (1+(64-NTDB_TOPLEVEL_HASH_BITS) / NTDB_SUBLEVEL_HASH_BITS)
+
+/* Extend file by least 100 times larger than needed. */
+#define NTDB_EXTENSION_FACTOR 100
+
+/* We steal bits from the offsets to store hash info. */
+#define NTDB_OFF_HASH_GROUP_MASK ((1ULL << NTDB_HASH_GROUP_BITS) - 1)
+/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */
+#define NTDB_OFF_UPPER_STEAL 8
+#define   NTDB_OFF_UPPER_STEAL_EXTRA 7
+/* The bit number where we store extra hash bits. */
+#define NTDB_OFF_HASH_EXTRA_BIT 57
+#define NTDB_OFF_UPPER_STEAL_SUBHASH_BIT 56
+
+/* Additional features we understand.  Currently: none. */
+#define NTDB_FEATURE_MASK ((uint64_t)0)
+
+/* The bit number where we store the extra hash bits. */
+/* Convenience mask to get actual offset. */
+#define NTDB_OFF_MASK                                                  \
+       (((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1) - NTDB_OFF_HASH_GROUP_MASK)
+
+/* How many buckets in a free list: see size_to_bucket(). */
+#define NTDB_FREE_BUCKETS (64 - NTDB_OFF_UPPER_STEAL)
+
+/* We have to be able to fit a free record here. */
+#define NTDB_MIN_DATA_LEN                                              \
+       (sizeof(struct ntdb_free_record) - sizeof(struct ntdb_used_record))
+
+/* Indicates this entry is not on an flist (can happen during coalescing) */
+#define NTDB_FTABLE_NONE ((1ULL << NTDB_OFF_UPPER_STEAL) - 1)
+
+struct ntdb_used_record {
+       /* For on-disk compatibility, we avoid bitfields:
+          magic: 16,        (highest)
+          key_len_bits: 5,
+          extra_padding: 32
+          hash_bits: 11
+       */
+        uint64_t magic_and_meta;
+       /* The bottom key_len_bits*2 are key length, rest is data length. */
+        uint64_t key_and_data_len;
+};
+
+static inline unsigned rec_key_bits(const struct ntdb_used_record *r)
+{
+       return ((r->magic_and_meta >> 43) & ((1 << 5)-1)) * 2;
+}
+
+static inline uint64_t rec_key_length(const struct ntdb_used_record *r)
+{
+       return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1);
+}
+
+static inline uint64_t rec_data_length(const struct ntdb_used_record *r)
+{
+       return r->key_and_data_len >> rec_key_bits(r);
+}
+
+static inline uint64_t rec_extra_padding(const struct ntdb_used_record *r)
+{
+       return (r->magic_and_meta >> 11) & 0xFFFFFFFF;
+}
+
+static inline uint32_t rec_hash(const struct ntdb_used_record *r)
+{
+       return r->magic_and_meta & ((1 << 11) - 1);
+}
+
+static inline uint16_t rec_magic(const struct ntdb_used_record *r)
+{
+       return (r->magic_and_meta >> 48);
+}
+
+struct ntdb_free_record {
+        uint64_t magic_and_prev; /* NTDB_OFF_UPPER_STEAL bits magic, then prev */
+        uint64_t ftable_and_len; /* Len not counting these two fields. */
+       /* This is why the minimum record size is 8 bytes.  */
+       uint64_t next;
+};
+
+static inline uint64_t frec_prev(const struct ntdb_free_record *f)
+{
+       return f->magic_and_prev & ((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1);
+}
+
+static inline uint64_t frec_magic(const struct ntdb_free_record *f)
+{
+       return f->magic_and_prev >> (64 - NTDB_OFF_UPPER_STEAL);
+}
+
+static inline uint64_t frec_len(const struct ntdb_free_record *f)
+{
+       return f->ftable_and_len & ((1ULL << (64 - NTDB_OFF_UPPER_STEAL))-1);
+}
+
+static inline unsigned frec_ftable(const struct ntdb_free_record *f)
+{
+       return f->ftable_and_len >> (64 - NTDB_OFF_UPPER_STEAL);
+}
+
+struct ntdb_recovery_record {
+       uint64_t magic;
+       /* Length of record (add this header to get total length). */
+       uint64_t max_len;
+       /* Length used. */
+       uint64_t len;
+       /* Old length of file before transaction. */
+       uint64_t eof;
+};
+
+/* If we bottom out of the subhashes, we chain. */
+struct ntdb_chain {
+       ntdb_off_t rec[1 << NTDB_HASH_GROUP_BITS];
+       ntdb_off_t next;
+};
+
+/* this is stored at the front of every database */
+struct ntdb_header {
+       char magic_food[64]; /* for /etc/magic */
+       /* FIXME: Make me 32 bit? */
+       uint64_t version; /* version of the code */
+       uint64_t hash_test; /* result of hashing HASH_MAGIC. */
+       uint64_t hash_seed; /* "random" seed written at creation time. */
+       ntdb_off_t free_table; /* (First) free table. */
+       ntdb_off_t recovery; /* Transaction recovery area. */
+
+       uint64_t features_used; /* Features all writers understand */
+       uint64_t features_offered; /* Features offered */
+
+       uint64_t seqnum; /* Sequence number for NTDB_SEQNUM */
+
+       ntdb_off_t capabilities; /* Optional linked list of capabilities. */
+       ntdb_off_t reserved[22];
+
+       /* Top level hash table. */
+       ntdb_off_t hashtable[1ULL << NTDB_TOPLEVEL_HASH_BITS];
+};
+
+struct ntdb_freetable {
+       struct ntdb_used_record hdr;
+       ntdb_off_t next;
+       ntdb_off_t buckets[NTDB_FREE_BUCKETS];
+};
+
+struct ntdb_capability {
+       struct ntdb_used_record hdr;
+       ntdb_off_t type;
+       ntdb_off_t next;
+       /* ... */
+};
+
+/* Information about a particular (locked) hash entry. */
+struct hash_info {
+       /* Full hash value of entry. */
+       uint64_t h;
+       /* Start and length of lock acquired. */
+       ntdb_off_t hlock_start;
+       ntdb_len_t hlock_range;
+       /* Start of hash group. */
+       ntdb_off_t group_start;
+       /* Bucket we belong in. */
+       unsigned int home_bucket;
+       /* Bucket we (or an empty space) were found in. */
+       unsigned int found_bucket;
+       /* How many bits of the hash are already used. */
+       unsigned int hash_used;
+       /* Current working group. */
+       ntdb_off_t group[1 << NTDB_HASH_GROUP_BITS];
+};
+
+struct traverse_info {
+       struct traverse_level {
+               ntdb_off_t hashtable;
+               /* We ignore groups here, and treat it as a big array. */
+               unsigned entry;
+               unsigned int total_buckets;
+       } levels[NTDB_MAX_LEVELS + 1];
+       unsigned int num_levels;
+       unsigned int toplevel_group;
+       /* This makes delete-everything-inside-traverse work as expected. */
+       ntdb_off_t prev;
+};
+
+enum ntdb_lock_flags {
+       /* WAIT == F_SETLKW, NOWAIT == F_SETLK */
+       NTDB_LOCK_NOWAIT = 0,
+       NTDB_LOCK_WAIT = 1,
+       /* If set, don't log an error on failure. */
+       NTDB_LOCK_PROBE = 2,
+       /* If set, don't check for recovery (used by recovery code). */
+       NTDB_LOCK_NOCHECK = 4,
+};
+
+struct ntdb_lock {
+       struct ntdb_context *owner;
+       off_t off;
+       uint32_t count;
+       uint32_t ltype;
+};
+
+/* This is only needed for ntdb_access_commit, but used everywhere to
+ * simplify. */
+struct ntdb_access_hdr {
+       struct ntdb_access_hdr *next;
+       ntdb_off_t off;
+       ntdb_len_t len;
+       bool convert;
+};
+
+struct ntdb_file {
+       /* How many are sharing us? */
+       unsigned int refcnt;
+
+       /* Mmap (if any), or malloc (for NTDB_INTERNAL). */
+       void *map_ptr;
+
+       /* How much space has been mapped (<= current file size) */
+       ntdb_len_t map_size;
+
+       /* The file descriptor (-1 for NTDB_INTERNAL). */
+       int fd;
+
+       /* Lock information */
+       pid_t locker;
+       struct ntdb_lock allrecord_lock;
+       size_t num_lockrecs;
+       struct ntdb_lock *lockrecs;
+
+       /* Identity of this file. */
+       dev_t device;
+       ino_t inode;
+};
+
+struct ntdb_methods {
+       enum NTDB_ERROR (*tread)(struct ntdb_context *, ntdb_off_t, void *,
+                                ntdb_len_t);
+       enum NTDB_ERROR (*twrite)(struct ntdb_context *, ntdb_off_t, const void *,
+                                 ntdb_len_t);
+       enum NTDB_ERROR (*oob)(struct ntdb_context *, ntdb_off_t, ntdb_len_t, bool);
+       enum NTDB_ERROR (*expand_file)(struct ntdb_context *, ntdb_len_t);
+       void *(*direct)(struct ntdb_context *, ntdb_off_t, size_t, bool);
+};
+
+/*
+  internal prototypes
+*/
+/* hash.c: */
+uint64_t ntdb_jenkins_hash(const void *key, size_t length, uint64_t seed,
+                          void *unused);
+
+enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb,
+                             struct traverse_info *tinfo,
+                             NTDB_DATA *kbuf, size_t *dlen);
+
+enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb,
+                            struct traverse_info *tinfo,
+                            NTDB_DATA *kbuf, size_t *dlen);
+
+/* Hash random memory. */
+uint64_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len);
+
+/* Hash on disk. */
+uint64_t hash_record(struct ntdb_context *ntdb, ntdb_off_t off);
+
+/* Find and lock a hash entry (or where it would be). */
+ntdb_off_t find_and_lock(struct ntdb_context *ntdb,
+                        NTDB_DATA key,
+                        int ltype,
+                        struct hash_info *h,
+                        struct ntdb_used_record *rec,
+                        struct traverse_info *tinfo);
+
+enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb,
+                               struct hash_info *h,
+                               ntdb_off_t new_off);
+
+enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, struct hash_info *h,
+                           ntdb_off_t new_off);
+
+enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, struct hash_info *h);
+
+/* For ntdb_check */
+bool is_subhash(ntdb_off_t val);
+enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller,
+                                  ntdb_off_t type);
+
+/* free.c: */
+enum NTDB_ERROR ntdb_ftable_init(struct ntdb_context *ntdb);
+
+/* check.c needs these to iterate through free lists. */
+ntdb_off_t first_ftable(struct ntdb_context *ntdb);
+ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable);
+
+/* This returns space or -ve error number. */
+ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen,
+                uint64_t hash, unsigned magic, bool growing);
+
+/* Put this record in a free list. */
+enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb,
+                               ntdb_off_t off, ntdb_len_t len_with_header,
+                               enum ntdb_lock_flags waitflag,
+                               bool coalesce_ok);
+
+/* Set up header for a used/ftable/htable/chain/capability record. */
+enum NTDB_ERROR set_header(struct ntdb_context *ntdb,
+                          struct ntdb_used_record *rec,
+                          unsigned magic, uint64_t keylen, uint64_t datalen,
+                          uint64_t actuallen, unsigned hashlow);
+
+/* Used by ntdb_check to verify. */
+unsigned int size_to_bucket(ntdb_len_t data_len);
+ntdb_off_t bucket_off(ntdb_off_t ftable_off, unsigned bucket);
+
+/* Used by ntdb_summary */
+ntdb_off_t dead_space(struct ntdb_context *ntdb, ntdb_off_t off);
+
+/* Adjust expansion, used by create_recovery_area */
+ntdb_off_t ntdb_expand_adjust(ntdb_off_t map_size, ntdb_off_t size);
+
+/* io.c: */
+/* Initialize ntdb->methods. */
+void ntdb_io_init(struct ntdb_context *ntdb);
+
+/* Convert endian of the buffer if required. */
+void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size);
+
+/* Unmap and try to map the ntdb. */
+void ntdb_munmap(struct ntdb_file *file);
+enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb);
+
+/* Either alloc a copy, or give direct access.  Release frees or noop. */
+const void *ntdb_access_read(struct ntdb_context *ntdb,
+                            ntdb_off_t off, ntdb_len_t len, bool convert);
+void *ntdb_access_write(struct ntdb_context *ntdb,
+                       ntdb_off_t off, ntdb_len_t len, bool convert);
+
+/* Release result of ntdb_access_read/write. */
+void ntdb_access_release(struct ntdb_context *ntdb, const void *p);
+/* Commit result of ntdb_acces_write. */
+enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p);
+
+/* Convenience routine to get an offset. */
+ntdb_off_t ntdb_read_off(struct ntdb_context *ntdb, ntdb_off_t off);
+
+/* Write an offset at an offset. */
+enum NTDB_ERROR ntdb_write_off(struct ntdb_context *ntdb, ntdb_off_t off,
+                              ntdb_off_t val);
+
+/* Clear an ondisk area. */
+enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len);
+
+/* Return a non-zero offset between >= start < end in this array (or end). */
+ntdb_off_t ntdb_find_nonzero_off(struct ntdb_context *ntdb,
+                                ntdb_off_t base,
+                                uint64_t start,
+                                uint64_t end);
+
+/* Return a zero offset in this array, or num. */
+ntdb_off_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off,
+                             uint64_t num);
+
+/* Allocate and make a copy of some offset. */
+void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len);
+
+/* Writes a converted copy of a record. */
+enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off,
+                                  const void *rec, size_t len);
+
+/* Reads record and converts it */
+enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off,
+                                 void *rec, size_t len);
+
+/* Bump the seqnum (caller checks for ntdb->flags & NTDB_SEQNUM) */
+void ntdb_inc_seqnum(struct ntdb_context *ntdb);
+
+/* lock.c: */
+/* Print message because another ntdb owns a lock we want. */
+enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call);
+
+/* If we fork, we no longer really own locks. */
+bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log);
+
+/* Lock/unlock a range of hashes. */
+enum NTDB_ERROR ntdb_lock_hashes(struct ntdb_context *ntdb,
+                                ntdb_off_t hash_lock, ntdb_len_t hash_range,
+                                int ltype, enum ntdb_lock_flags waitflag);
+enum NTDB_ERROR ntdb_unlock_hashes(struct ntdb_context *ntdb,
+                                  ntdb_off_t hash_lock,
+                                  ntdb_len_t hash_range, int ltype);
+
+/* For closing the file. */
+void ntdb_lock_cleanup(struct ntdb_context *ntdb);
+
+/* Lock/unlock a particular free bucket. */
+enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off,
+                                     enum ntdb_lock_flags waitflag);
+void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off);
+
+/* Serialize transaction start. */
+enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype);
+void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype);
+
+/* Do we have any hash locks (ie. via ntdb_chainlock) ? */
+bool ntdb_has_hash_locks(struct ntdb_context *ntdb);
+
+/* Lock entire database. */
+enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype,
+                                   enum ntdb_lock_flags flags, bool upgradable);
+void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype);
+enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start);
+
+/* Serialize db open. */
+enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb,
+                              int ltype, enum ntdb_lock_flags flags);
+void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype);
+bool ntdb_has_open_lock(struct ntdb_context *ntdb);
+
+/* Serialize db expand. */
+enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype);
+void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype);
+bool ntdb_has_expansion_lock(struct ntdb_context *ntdb);
+
+/* If it needs recovery, grab all the locks and do it. */
+enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb);
+
+/* Default lock and unlock functions. */
+int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, void *);
+int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *);
+
+/* transaction.c: */
+enum NTDB_ERROR ntdb_transaction_recover(struct ntdb_context *ntdb);
+ntdb_bool_err ntdb_needs_recovery(struct ntdb_context *ntdb);
+
+struct ntdb_context {
+       /* Single list of all TDBs, to detect multiple opens. */
+       struct ntdb_context *next;
+
+       /* Filename of the database. */
+       const char *name;
+
+       /* Logging function */
+       void (*log_fn)(struct ntdb_context *ntdb,
+                      enum ntdb_log_level level,
+                      enum NTDB_ERROR ecode,
+                      const char *message,
+                      void *data);
+       void *log_data;
+
+       /* Open flags passed to ntdb_open. */
+       int open_flags;
+
+       /* low level (fnctl) lock functions. */
+       int (*lock_fn)(int fd, int rw, off_t off, off_t len, bool w, void *);
+       int (*unlock_fn)(int fd, int rw, off_t off, off_t len, void *);
+       void *lock_data;
+
+       /* the ntdb flags passed to ntdb_open. */
+       uint32_t flags;
+
+       /* Our statistics. */
+       struct ntdb_attribute_stats stats;
+
+       /* The actual file information */
+       struct ntdb_file *file;
+
+       /* Hash function. */
+       uint64_t (*hash_fn)(const void *key, size_t len, uint64_t seed, void *);
+       void *hash_data;
+       uint64_t hash_seed;
+
+       /* Our open hook, if any. */
+       enum NTDB_ERROR (*openhook)(int fd, void *data);
+       void *openhook_data;
+
+       /* Last error we returned. */
+       enum NTDB_ERROR last_error;
+
+       /* Are we accessing directly? (debugging check). */
+       int direct_access;
+
+       /* Set if we are in a transaction. */
+       struct ntdb_transaction *transaction;
+
+       /* What free table are we using? */
+       ntdb_off_t ftable_off;
+       unsigned int ftable;
+
+       /* IO methods: changes for transactions. */
+       const struct ntdb_methods *io;
+
+       /* Direct access information */
+       struct ntdb_access_hdr *access;
+};
+
+/* ntdb.c: */
+enum NTDB_ERROR COLD PRINTF_FMT(4, 5)
+       ntdb_logerr(struct ntdb_context *ntdb,
+                   enum NTDB_ERROR ecode,
+                   enum ntdb_log_level level,
+                   const char *fmt, ...);
+
+#ifdef NTDB_TRACE
+void ntdb_trace(struct ntdb_context *ntdb, const char *op);
+void ntdb_trace_seqnum(struct ntdb_context *ntdb, uint32_t seqnum, const char *op);
+void ntdb_trace_open(struct ntdb_context *ntdb, const char *op,
+                    unsigned hash_size, unsigned ntdb_flags, unsigned open_flags);
+void ntdb_trace_ret(struct ntdb_context *ntdb, const char *op, int ret);
+void ntdb_trace_retrec(struct ntdb_context *ntdb, const char *op, NTDB_DATA ret);
+void ntdb_trace_1rec(struct ntdb_context *ntdb, const char *op,
+                    NTDB_DATA rec);
+void ntdb_trace_1rec_ret(struct ntdb_context *ntdb, const char *op,
+                        NTDB_DATA rec, int ret);
+void ntdb_trace_1rec_retrec(struct ntdb_context *ntdb, const char *op,
+                           NTDB_DATA rec, NTDB_DATA ret);
+void ntdb_trace_2rec_flag_ret(struct ntdb_context *ntdb, const char *op,
+                             NTDB_DATA rec1, NTDB_DATA rec2, unsigned flag,
+                             int ret);
+void ntdb_trace_2rec_retrec(struct ntdb_context *ntdb, const char *op,
+                           NTDB_DATA rec1, NTDB_DATA rec2, NTDB_DATA ret);
+#else
+#define ntdb_trace(ntdb, op)
+#define ntdb_trace_seqnum(ntdb, seqnum, op)
+#define ntdb_trace_open(ntdb, op, hash_size, ntdb_flags, open_flags)
+#define ntdb_trace_ret(ntdb, op, ret)
+#define ntdb_trace_retrec(ntdb, op, ret)
+#define ntdb_trace_1rec(ntdb, op, rec)
+#define ntdb_trace_1rec_ret(ntdb, op, rec, ret)
+#define ntdb_trace_1rec_retrec(ntdb, op, rec, ret)
+#define ntdb_trace_2rec_flag_ret(ntdb, op, rec1, rec2, flag, ret)
+#define ntdb_trace_2rec_retrec(ntdb, op, rec1, rec2, ret)
+#endif /* !NTDB_TRACE */
+
+#endif
diff --git a/lib/ntdb/pyntdb.c b/lib/ntdb/pyntdb.c
new file mode 100644 (file)
index 0000000..1f80e42
--- /dev/null
@@ -0,0 +1,591 @@
+/*
+   Unix SMB/CIFS implementation.
+
+   Python interface to ntdb.  Simply modified from tdb version.
+
+   Copyright (C) 2004-2006 Tim Potter <tpot@samba.org>
+   Copyright (C) 2007-2008 Jelmer Vernooij <jelmer@samba.org>
+   Copyright (C) 2011 Rusty Russell <rusty@rustcorp.com.au>
+
+     ** NOTE! The following LGPL license applies to the ntdb
+     ** library. This does NOT imply that all of Samba is released
+     ** under the LGPL
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <Python.h>
+#include "replace.h"
+#include "system/filesys.h"
+
+#ifndef Py_RETURN_NONE
+#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
+#endif
+
+/* Include ntdb headers */
+#include <ntdb.h>
+
+typedef struct {
+       PyObject_HEAD
+       struct ntdb_context *ctx;
+       bool closed;
+} PyNtdbObject;
+
+staticforward PyTypeObject PyNtdb;
+
+static void PyErr_SetTDBError(enum NTDB_ERROR e)
+{
+       PyErr_SetObject(PyExc_RuntimeError,
+               Py_BuildValue("(i,s)", e, ntdb_errorstr(e)));
+}
+
+static NTDB_DATA PyString_AsNtdb_Data(PyObject *data)
+{
+       NTDB_DATA ret;
+       ret.dptr = (unsigned char *)PyString_AsString(data);
+       ret.dsize = PyString_Size(data);
+       return ret;
+}
+
+static PyObject *PyString_FromNtdb_Data(NTDB_DATA data)
+{
+       PyObject *ret = PyString_FromStringAndSize((const char *)data.dptr,
+                                                  data.dsize);
+       free(data.dptr);
+       return ret;
+}
+
+#define PyErr_NTDB_ERROR_IS_ERR_RAISE(ret) \
+       if (ret != NTDB_SUCCESS) { \
+               PyErr_SetTDBError(ret); \
+               return NULL; \
+       }
+
+static void stderr_log(struct ntdb_context *ntdb,
+                      enum ntdb_log_level level,
+                      enum NTDB_ERROR ecode,
+                      const char *message,
+                      void *data)
+{
+       fprintf(stderr, "%s:%s:%s\n",
+               ntdb_name(ntdb), ntdb_errorstr(ecode), message);
+}
+
+static PyObject *py_ntdb_open(PyTypeObject *type, PyObject *args, PyObject *kwargs)
+{
+       char *name = NULL;
+       int ntdb_flags = NTDB_DEFAULT, flags = O_RDWR, mode = 0600;
+       struct ntdb_context *ctx;
+       PyNtdbObject *ret;
+       union ntdb_attribute logattr;
+       const char *kwnames[] = { "name", "ntdb_flags", "flags", "mode", NULL };
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siii", cast_const2(char **, kwnames), &name, &ntdb_flags, &flags, &mode))
+               return NULL;
+
+       if (name == NULL) {
+               ntdb_flags |= NTDB_INTERNAL;
+       }
+
+       logattr.log.base.attr = NTDB_ATTRIBUTE_LOG;
+       logattr.log.base.next = NULL;
+       logattr.log.fn = stderr_log;
+       ctx = ntdb_open(name, ntdb_flags, flags, mode, &logattr);
+       if (ctx == NULL) {
+               PyErr_SetFromErrno(PyExc_IOError);
+               return NULL;
+       }
+
+       ret = PyObject_New(PyNtdbObject, &PyNtdb);
+       if (!ret) {
+               ntdb_close(ctx);
+               return NULL;
+       }
+
+       ret->ctx = ctx;
+       ret->closed = false;
+       return (PyObject *)ret;
+}
+
+static PyObject *obj_transaction_cancel(PyNtdbObject *self)
+{
+       ntdb_transaction_cancel(self->ctx);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_transaction_commit(PyNtdbObject *self)
+{
+       enum NTDB_ERROR ret = ntdb_transaction_commit(self->ctx);
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_transaction_prepare_commit(PyNtdbObject *self)
+{
+       enum NTDB_ERROR ret = ntdb_transaction_prepare_commit(self->ctx);
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_transaction_start(PyNtdbObject *self)
+{
+       enum NTDB_ERROR ret = ntdb_transaction_start(self->ctx);
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_lockall(PyNtdbObject *self)
+{
+       enum NTDB_ERROR ret = ntdb_lockall(self->ctx);
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_unlockall(PyNtdbObject *self)
+{
+       ntdb_unlockall(self->ctx);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_lockall_read(PyNtdbObject *self)
+{
+       enum NTDB_ERROR ret = ntdb_lockall_read(self->ctx);
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_unlockall_read(PyNtdbObject *self)
+{
+       ntdb_unlockall_read(self->ctx);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_close(PyNtdbObject *self)
+{
+       int ret;
+       if (self->closed)
+               Py_RETURN_NONE;
+       ret = ntdb_close(self->ctx);
+       self->closed = true;
+       if (ret != 0) {
+               PyErr_SetTDBError(NTDB_ERR_IO);
+               return NULL;
+       }
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_get(PyNtdbObject *self, PyObject *args)
+{
+       NTDB_DATA key, data;
+       PyObject *py_key;
+       enum NTDB_ERROR ret;
+       if (!PyArg_ParseTuple(args, "O", &py_key))
+               return NULL;
+
+       key = PyString_AsNtdb_Data(py_key);
+       ret = ntdb_fetch(self->ctx, key, &data);
+       if (ret == NTDB_ERR_NOEXIST)
+               Py_RETURN_NONE;
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+       return PyString_FromNtdb_Data(data);
+}
+
+static PyObject *obj_append(PyNtdbObject *self, PyObject *args)
+{
+       NTDB_DATA key, data;
+       PyObject *py_key, *py_data;
+       enum NTDB_ERROR ret;
+       if (!PyArg_ParseTuple(args, "OO", &py_key, &py_data))
+               return NULL;
+
+       key = PyString_AsNtdb_Data(py_key);
+       data = PyString_AsNtdb_Data(py_data);
+
+       ret = ntdb_append(self->ctx, key, data);
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_firstkey(PyNtdbObject *self)
+{
+       enum NTDB_ERROR ret;
+       NTDB_DATA key;
+
+       ret = ntdb_firstkey(self->ctx, &key);
+       if (ret == NTDB_ERR_NOEXIST)
+               Py_RETURN_NONE;
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+
+       return PyString_FromNtdb_Data(key);
+}
+
+static PyObject *obj_nextkey(PyNtdbObject *self, PyObject *args)
+{
+       NTDB_DATA key;
+       PyObject *py_key;
+       enum NTDB_ERROR ret;
+       if (!PyArg_ParseTuple(args, "O", &py_key))
+               return NULL;
+
+       /* Malloc here, since ntdb_nextkey frees. */
+       key.dsize = PyString_Size(py_key);
+       key.dptr = malloc(key.dsize);
+       memcpy(key.dptr, PyString_AsString(py_key), key.dsize);
+
+       ret = ntdb_nextkey(self->ctx, &key);
+       if (ret == NTDB_ERR_NOEXIST)
+               Py_RETURN_NONE;
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+
+       return PyString_FromNtdb_Data(key);
+}
+
+static PyObject *obj_delete(PyNtdbObject *self, PyObject *args)
+{
+       NTDB_DATA key;
+       PyObject *py_key;
+       enum NTDB_ERROR ret;
+       if (!PyArg_ParseTuple(args, "O", &py_key))
+               return NULL;
+
+       key = PyString_AsNtdb_Data(py_key);
+       ret = ntdb_delete(self->ctx, key);
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_has_key(PyNtdbObject *self, PyObject *args)
+{
+       NTDB_DATA key;
+       PyObject *py_key;
+       if (!PyArg_ParseTuple(args, "O", &py_key))
+               return NULL;
+
+       key = PyString_AsNtdb_Data(py_key);
+       if (ntdb_exists(self->ctx, key))
+               return Py_True;
+       if (ntdb_error(self->ctx) != NTDB_ERR_NOEXIST)
+               PyErr_NTDB_ERROR_IS_ERR_RAISE(ntdb_error(self->ctx));
+       return Py_False;
+}
+
+static PyObject *obj_store(PyNtdbObject *self, PyObject *args)
+{
+       NTDB_DATA key, value;
+       enum NTDB_ERROR ret;
+       int flag = NTDB_REPLACE;
+       PyObject *py_key, *py_value;
+
+       if (!PyArg_ParseTuple(args, "OO|i", &py_key, &py_value, &flag))
+               return NULL;
+
+       key = PyString_AsNtdb_Data(py_key);
+       value = PyString_AsNtdb_Data(py_value);
+
+       ret = ntdb_store(self->ctx, key, value, flag);
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_add_flag(PyNtdbObject *self, PyObject *args)
+{
+       unsigned flag;
+
+       if (!PyArg_ParseTuple(args, "I", &flag))
+               return NULL;
+
+       ntdb_add_flag(self->ctx, flag);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_remove_flag(PyNtdbObject *self, PyObject *args)
+{
+       unsigned flag;
+
+       if (!PyArg_ParseTuple(args, "I", &flag))
+               return NULL;
+
+       ntdb_remove_flag(self->ctx, flag);
+       Py_RETURN_NONE;
+}
+
+typedef struct {
+       PyObject_HEAD
+       NTDB_DATA current;
+       bool end;
+       PyNtdbObject *iteratee;
+} PyNtdbIteratorObject;
+
+static PyObject *ntdb_iter_next(PyNtdbIteratorObject *self)
+{
+       enum NTDB_ERROR e;
+       PyObject *ret;
+       if (self->end)
+               return NULL;
+       ret = PyString_FromStringAndSize((const char *)self->current.dptr,
+                                        self->current.dsize);
+       e = ntdb_nextkey(self->iteratee->ctx, &self->current);
+       if (e == NTDB_ERR_NOEXIST)
+               self->end = true;
+       else
+               PyErr_NTDB_ERROR_IS_ERR_RAISE(e);
+       return ret;
+}
+
+static void ntdb_iter_dealloc(PyNtdbIteratorObject *self)
+{
+       Py_DECREF(self->iteratee);
+       PyObject_Del(self);
+}
+
+PyTypeObject PyNtdbIterator = {
+       .tp_name = "Iterator",
+       .tp_basicsize = sizeof(PyNtdbIteratorObject),
+       .tp_iternext = (iternextfunc)ntdb_iter_next,
+       .tp_dealloc = (destructor)ntdb_iter_dealloc,
+       .tp_flags = Py_TPFLAGS_DEFAULT,
+       .tp_iter = PyObject_SelfIter,
+};
+
+static PyObject *ntdb_object_iter(PyNtdbObject *self)
+{
+       PyNtdbIteratorObject *ret;
+       enum NTDB_ERROR e;
+
+       ret = PyObject_New(PyNtdbIteratorObject, &PyNtdbIterator);
+       if (!ret)
+               return NULL;
+       e = ntdb_firstkey(self->ctx, &ret->current);
+       if (e == NTDB_ERR_NOEXIST) {
+               ret->end = true;
+       } else {
+               PyErr_NTDB_ERROR_IS_ERR_RAISE(e);
+               ret->end = false;
+       }
+       ret->iteratee = self;
+       Py_INCREF(self);
+       return (PyObject *)ret;
+}
+
+static PyObject *obj_clear(PyNtdbObject *self)
+{
+       enum NTDB_ERROR ret = ntdb_wipe_all(self->ctx);
+       PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+       Py_RETURN_NONE;
+}
+
+static PyObject *obj_enable_seqnum(PyNtdbObject *self)
+{
+       ntdb_add_flag(self->ctx, NTDB_SEQNUM);
+       Py_RETURN_NONE;
+}
+
+static PyMethodDef ntdb_object_methods[] = {
+       { "transaction_cancel", (PyCFunction)obj_transaction_cancel, METH_NOARGS,
+               "S.transaction_cancel() -> None\n"
+               "Cancel the currently active transaction." },
+       { "transaction_commit", (PyCFunction)obj_transaction_commit, METH_NOARGS,
+               "S.transaction_commit() -> None\n"
+               "Commit the currently active transaction." },
+       { "transaction_prepare_commit", (PyCFunction)obj_transaction_prepare_commit, METH_NOARGS,
+               "S.transaction_prepare_commit() -> None\n"
+               "Prepare to commit the currently active transaction" },
+       { "transaction_start", (PyCFunction)obj_transaction_start, METH_NOARGS,
+               "S.transaction_start() -> None\n"
+               "Start a new transaction." },
+       { "lock_all", (PyCFunction)obj_lockall, METH_NOARGS, NULL },
+       { "unlock_all", (PyCFunction)obj_unlockall, METH_NOARGS, NULL },
+       { "read_lock_all", (PyCFunction)obj_lockall_read, METH_NOARGS, NULL },
+       { "read_unlock_all", (PyCFunction)obj_unlockall_read, METH_NOARGS, NULL },
+       { "close", (PyCFunction)obj_close, METH_NOARGS, NULL },
+       { "get", (PyCFunction)obj_get, METH_VARARGS, "S.get(key) -> value\n"
+               "Fetch a value." },
+       { "append", (PyCFunction)obj_append, METH_VARARGS, "S.append(key, value) -> None\n"
+               "Append data to an existing key." },
+       { "firstkey", (PyCFunction)obj_firstkey, METH_NOARGS, "S.firstkey() -> data\n"
+               "Return the first key in this database." },
+       { "nextkey", (PyCFunction)obj_nextkey, METH_NOARGS, "S.nextkey(key) -> data\n"
+               "Return the next key in this database." },
+       { "delete", (PyCFunction)obj_delete, METH_VARARGS, "S.delete(key) -> None\n"
+               "Delete an entry." },
+       { "has_key", (PyCFunction)obj_has_key, METH_VARARGS, "S.has_key(key) -> None\n"
+               "Check whether key exists in this database." },
+       { "store", (PyCFunction)obj_store, METH_VARARGS, "S.store(key, data, flag=REPLACE) -> None"
+               "Store data." },
+       { "add_flag", (PyCFunction)obj_add_flag, METH_VARARGS, "S.add_flag(flag) -> None" },
+       { "remove_flag", (PyCFunction)obj_remove_flag, METH_VARARGS, "S.remove_flag(flag) -> None" },
+       { "iterkeys", (PyCFunction)ntdb_object_iter, METH_NOARGS, "S.iterkeys() -> iterator" },
+       { "clear", (PyCFunction)obj_clear, METH_NOARGS, "S.clear() -> None\n"
+               "Wipe the entire database." },
+       { "enable_seqnum", (PyCFunction)obj_enable_seqnum, METH_NOARGS,
+               "S.enable_seqnum() -> None" },
+       { NULL }
+};
+
+static PyObject *obj_get_flags(PyNtdbObject *self, void *closure)
+{
+       return PyInt_FromLong(ntdb_get_flags(self->ctx));
+}
+
+static PyObject *obj_get_filename(PyNtdbObject *self, void *closure)
+{
+       return PyString_FromString(ntdb_name(self->ctx));
+}
+
+static PyObject *obj_get_seqnum(PyNtdbObject *self, void *closure)
+{
+       return PyInt_FromLong(ntdb_get_seqnum(self->ctx));
+}
+
+
+static PyGetSetDef ntdb_object_getsetters[] = {
+       { cast_const(char *, "flags"), (getter)obj_get_flags, NULL, NULL },
+       { cast_const(char *, "filename"), (getter)obj_get_filename, NULL,
+         cast_const(char *, "The filename of this NTDB file.")},
+       { cast_const(char *, "seqnum"), (getter)obj_get_seqnum, NULL, NULL },
+       { NULL }
+};
+
+static PyObject *ntdb_object_repr(PyNtdbObject *self)
+{
+       if (ntdb_get_flags(self->ctx) & NTDB_INTERNAL) {
+               return PyString_FromString("Ntdb(<internal>)");
+       } else {
+               return PyString_FromFormat("Ntdb('%s')", ntdb_name(self->ctx));
+       }
+}
+
+static void ntdb_object_dealloc(PyNtdbObject *self)
+{
+       if (!self->closed)
+               ntdb_close(self->ctx);
+       self->ob_type->tp_free(self);
+}
+
+static PyObject *obj_getitem(PyNtdbObject *self, PyObject *key)
+{
+       NTDB_DATA tkey, val;
+       enum NTDB_ERROR ret;
+
+       if (!PyString_Check(key)) {
+               PyErr_SetString(PyExc_TypeError, "Expected string as key");
+               return NULL;
+       }
+
+       tkey.dptr = (unsigned char *)PyString_AsString(key);
+       tkey.dsize = PyString_Size(key);
+
+       ret = ntdb_fetch(self->ctx, tkey, &val);
+       if (ret == NTDB_ERR_NOEXIST) {
+               PyErr_SetString(PyExc_KeyError, "No such NTDB entry");
+               return NULL;
+       } else {
+               PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+               return PyString_FromNtdb_Data(val);
+       }
+}
+
+static int obj_setitem(PyNtdbObject *self, PyObject *key, PyObject *value)
+{
+       NTDB_DATA tkey, tval;
+       enum NTDB_ERROR ret;
+       if (!PyString_Check(key)) {
+               PyErr_SetString(PyExc_TypeError, "Expected string as key");
+               return -1;
+       }
+
+       tkey = PyString_AsNtdb_Data(key);
+
+       if (value == NULL) {
+               ret = ntdb_delete(self->ctx, tkey);
+       } else {
+               if (!PyString_Check(value)) {
+                       PyErr_SetString(PyExc_TypeError, "Expected string as value");
+                       return -1;
+               }
+
+               tval = PyString_AsNtdb_Data(value);
+
+               ret = ntdb_store(self->ctx, tkey, tval, NTDB_REPLACE);
+       }
+
+       if (ret != NTDB_SUCCESS) {
+               PyErr_SetTDBError(ret);
+               return -1;
+       }
+
+       return ret;
+}
+
+static PyMappingMethods ntdb_object_mapping = {
+       .mp_subscript = (binaryfunc)obj_getitem,
+       .mp_ass_subscript = (objobjargproc)obj_setitem,
+};
+static PyTypeObject PyNtdb = {
+       .tp_name = "ntdb.Ntdb",
+       .tp_basicsize = sizeof(PyNtdbObject),
+       .tp_methods = ntdb_object_methods,
+       .tp_getset = ntdb_object_getsetters,
+       .tp_new = py_ntdb_open,
+       .tp_doc = "A NTDB file",
+       .tp_repr = (reprfunc)ntdb_object_repr,
+       .tp_dealloc = (destructor)ntdb_object_dealloc,
+       .tp_as_mapping = &ntdb_object_mapping,
+       .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_ITER,
+       .tp_iter = (getiterfunc)ntdb_object_iter,
+};
+
+static PyMethodDef ntdb_methods[] = {
+       { "open", (PyCFunction)py_ntdb_open, METH_VARARGS|METH_KEYWORDS, "open(name, hash_size=0, ntdb_flags=NTDB_DEFAULT, flags=O_RDWR, mode=0600)\n"
+               "Open a NTDB file." },
+       { NULL }
+};
+
+void inittdb(void);
+void inittdb(void)
+{
+       PyObject *m;
+
+       if (PyType_Ready(&PyNtdb) < 0)
+               return;
+
+       if (PyType_Ready(&PyNtdbIterator) < 0)
+               return;
+
+       m = Py_InitModule3("ntdb", ntdb_methods, "NTDB is a simple key-value database similar to GDBM that supports multiple writers.");
+       if (m == NULL)
+               return;
+
+       PyModule_AddObject(m, "REPLACE", PyInt_FromLong(NTDB_REPLACE));
+       PyModule_AddObject(m, "INSERT", PyInt_FromLong(NTDB_INSERT));
+       PyModule_AddObject(m, "MODIFY", PyInt_FromLong(NTDB_MODIFY));
+
+       PyModule_AddObject(m, "DEFAULT", PyInt_FromLong(NTDB_DEFAULT));
+       PyModule_AddObject(m, "INTERNAL", PyInt_FromLong(NTDB_INTERNAL));
+       PyModule_AddObject(m, "NOLOCK", PyInt_FromLong(NTDB_NOLOCK));
+       PyModule_AddObject(m, "NOMMAP", PyInt_FromLong(NTDB_NOMMAP));
+       PyModule_AddObject(m, "CONVERT", PyInt_FromLong(NTDB_CONVERT));
+       PyModule_AddObject(m, "NOSYNC", PyInt_FromLong(NTDB_NOSYNC));
+       PyModule_AddObject(m, "SEQNUM", PyInt_FromLong(NTDB_SEQNUM));
+       PyModule_AddObject(m, "ALLOW_NESTING", PyInt_FromLong(NTDB_ALLOW_NESTING));
+
+       PyModule_AddObject(m, "__docformat__", PyString_FromString("restructuredText"));
+
+       PyModule_AddObject(m, "__version__", PyString_FromString(PACKAGE_VERSION));
+
+       Py_INCREF(&PyNtdb);
+       PyModule_AddObject(m, "Ntdb", (PyObject *)&PyNtdb);
+
+       Py_INCREF(&PyNtdbIterator);
+}
similarity index 62%
rename from lib/tdb2/summary.c
rename to lib/ntdb/summary.c
index c7e93284e00fdbf7bd0e7ea53ceb02855585d621..28ffd61df9a16b3c2c3db10f37da7cc4e3b5539f 100644 (file)
 #define HISTO_WIDTH 70
 #define HISTO_HEIGHT 20
 
-static tdb_off_t count_hash(struct tdb_context *tdb,
-                           tdb_off_t hash_off, unsigned bits)
+static ntdb_off_t count_hash(struct ntdb_context *ntdb,
+                           ntdb_off_t hash_off, unsigned bits)
 {
-       const tdb_off_t *h;
-       tdb_off_t count = 0;
+       const ntdb_off_t *h;
+       ntdb_off_t count = 0;
        unsigned int i;
 
-       h = tdb_access_read(tdb, hash_off, sizeof(*h) << bits, true);
-       if (TDB_PTR_IS_ERR(h)) {
-               return TDB_ERR_TO_OFF(TDB_PTR_ERR(h));
+       h = ntdb_access_read(ntdb, hash_off, sizeof(*h) << bits, true);
+       if (NTDB_PTR_IS_ERR(h)) {
+               return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(h));
        }
        for (i = 0; i < (1 << bits); i++)
                count += (h[i] != 0);
 
-       tdb_access_release(tdb, h);
+       ntdb_access_release(ntdb, h);
        return count;
 }
 
-static enum TDB_ERROR summarize(struct tdb_context *tdb,
+static enum NTDB_ERROR summarize(struct ntdb_context *ntdb,
                                struct tally *hashes,
                                struct tally *ftables,
                                struct tally *fr,
@@ -76,39 +76,39 @@ static enum TDB_ERROR summarize(struct tdb_context *tdb,
                                struct tally *chains,
                                size_t *num_caps)
 {
-       tdb_off_t off;
-       tdb_len_t len;
-       tdb_len_t unc = 0;
+       ntdb_off_t off;
+       ntdb_len_t len;
+       ntdb_len_t unc = 0;
 
-       for (off = sizeof(struct tdb_header);
-            off < tdb->file->map_size;
+       for (off = sizeof(struct ntdb_header);
+            off < ntdb->file->map_size;
             off += len) {
                const union {
-                       struct tdb_used_record u;
-                       struct tdb_free_record f;
-                       struct tdb_recovery_record r;
+                       struct ntdb_used_record u;
+                       struct ntdb_free_record f;
+                       struct ntdb_recovery_record r;
                } *p;
                /* We might not be able to get the whole thing. */
-               p = tdb_access_read(tdb, off, sizeof(p->f), true);
-               if (TDB_PTR_IS_ERR(p)) {
-                       return TDB_PTR_ERR(p);
+               p = ntdb_access_read(ntdb, off, sizeof(p->f), true);
+               if (NTDB_PTR_IS_ERR(p)) {
+                       return NTDB_PTR_ERR(p);
                }
-               if (frec_magic(&p->f) != TDB_FREE_MAGIC) {
+               if (frec_magic(&p->f) != NTDB_FREE_MAGIC) {
                        if (unc > 1) {
                                tally_add(uncoal, unc);
                                unc = 0;
                        }
                }
 
-               if (p->r.magic == TDB_RECOVERY_INVALID_MAGIC
-                   || p->r.magic == TDB_RECOVERY_MAGIC) {
+               if (p->r.magic == NTDB_RECOVERY_INVALID_MAGIC
+                   || p->r.magic == NTDB_RECOVERY_MAGIC) {
                        len = sizeof(p->r) + p->r.max_len;
-               } else if (frec_magic(&p->f) == TDB_FREE_MAGIC) {
+               } else if (frec_magic(&p->f) == NTDB_FREE_MAGIC) {
                        len = frec_len(&p->f);
                        tally_add(fr, len);
                        len += sizeof(p->u);
                        unc++;
-               } else if (rec_magic(&p->u) == TDB_USED_MAGIC) {
+               } else if (rec_magic(&p->u) == NTDB_USED_MAGIC) {
                        len = sizeof(p->u)
                                + rec_key_length(&p->u)
                                + rec_data_length(&p->u)
@@ -117,105 +117,105 @@ static enum TDB_ERROR summarize(struct tdb_context *tdb,
                        tally_add(keys, rec_key_length(&p->u));
                        tally_add(data, rec_data_length(&p->u));
                        tally_add(extra, rec_extra_padding(&p->u));
-               } else if (rec_magic(&p->u) == TDB_HTABLE_MAGIC) {
-                       tdb_off_t count = count_hash(tdb,
+               } else if (rec_magic(&p->u) == NTDB_HTABLE_MAGIC) {
+                       ntdb_off_t count = count_hash(ntdb,
                                                     off + sizeof(p->u),
-                                                    TDB_SUBLEVEL_HASH_BITS);
-                       if (TDB_OFF_IS_ERR(count)) {
-                               return TDB_OFF_TO_ERR(count);
+                                                    NTDB_SUBLEVEL_HASH_BITS);
+                       if (NTDB_OFF_IS_ERR(count)) {
+                               return NTDB_OFF_TO_ERR(count);
                        }
                        tally_add(hashes, count);
                        tally_add(extra, rec_extra_padding(&p->u));
                        len = sizeof(p->u)
                                + rec_data_length(&p->u)
                                + rec_extra_padding(&p->u);
-               } else if (rec_magic(&p->u) == TDB_FTABLE_MAGIC) {
+               } else if (rec_magic(&p->u) == NTDB_FTABLE_MAGIC) {
                        len = sizeof(p->u)
                                + rec_data_length(&p->u)
                                + rec_extra_padding(&p->u);
                        tally_add(ftables, rec_data_length(&p->u));
                        tally_add(extra, rec_extra_padding(&p->u));
-               } else if (rec_magic(&p->u) == TDB_CHAIN_MAGIC) {
+               } else if (rec_magic(&p->u) == NTDB_CHAIN_MAGIC) {
                        len = sizeof(p->u)
                                + rec_data_length(&p->u)
                                + rec_extra_padding(&p->u);
                        tally_add(chains, 1);
                        tally_add(extra, rec_extra_padding(&p->u));
-               } else if (rec_magic(&p->u) == TDB_CAP_MAGIC) {
+               } else if (rec_magic(&p->u) == NTDB_CAP_MAGIC) {
                        len = sizeof(p->u)
                                + rec_data_length(&p->u)
                                + rec_extra_padding(&p->u);
                        (*num_caps)++;
                } else {
-                       len = dead_space(tdb, off);
-                       if (TDB_OFF_IS_ERR(len)) {
-                               return TDB_OFF_TO_ERR(len);
+                       len = dead_space(ntdb, off);
+                       if (NTDB_OFF_IS_ERR(len)) {
+                               return NTDB_OFF_TO_ERR(len);
                        }
                }
-               tdb_access_release(tdb, p);
+               ntdb_access_release(ntdb, p);
        }
        if (unc)
                tally_add(uncoal, unc);
-       return TDB_SUCCESS;
+       return NTDB_SUCCESS;
 }
 
-static void add_capabilities(struct tdb_context *tdb, char *summary)
+static void add_capabilities(struct ntdb_context *ntdb, char *summary)
 {
-       tdb_off_t off, next;
-       const struct tdb_capability *cap;
+       ntdb_off_t off, next;
+       const struct ntdb_capability *cap;
        size_t count = 0;
 
        /* Append to summary. */
        summary += strlen(summary);
 
-       off = tdb_read_off(tdb, offsetof(struct tdb_header, capabilities));
-       if (TDB_OFF_IS_ERR(off))
+       off = ntdb_read_off(ntdb, offsetof(struct ntdb_header, capabilities));
+       if (NTDB_OFF_IS_ERR(off))
                return;
 
        /* Walk capability list. */
        for (; off; off = next) {
-               cap = tdb_access_read(tdb, off, sizeof(*cap), true);
-               if (TDB_PTR_IS_ERR(cap)) {
+               cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
+               if (NTDB_PTR_IS_ERR(cap)) {
                        break;
                }
                count++;
                sprintf(summary, CAPABILITY_FORMAT,
-                       cap->type & TDB_CAP_TYPE_MASK,
+                       cap->type & NTDB_CAP_TYPE_MASK,
                        /* Noopen?  How did we get here? */
-                       (cap->type & TDB_CAP_NOOPEN) ? " (unopenable)"
-                       : ((cap->type & TDB_CAP_NOWRITE)
-                          && (cap->type & TDB_CAP_NOCHECK)) ? " (uncheckable,read-only)"
-                       : (cap->type & TDB_CAP_NOWRITE) ? " (read-only)"
-                       : (cap->type & TDB_CAP_NOCHECK) ? " (uncheckable)"
+                       (cap->type & NTDB_CAP_NOOPEN) ? " (unopenable)"
+                       : ((cap->type & NTDB_CAP_NOWRITE)
+                          && (cap->type & NTDB_CAP_NOCHECK)) ? " (uncheckable,read-only)"
+                       : (cap->type & NTDB_CAP_NOWRITE) ? " (read-only)"
+                       : (cap->type & NTDB_CAP_NOCHECK) ? " (uncheckable)"
                        : "");
                summary += strlen(summary);
                next = cap->next;
-               tdb_access_release(tdb, cap);
+               ntdb_access_release(ntdb, cap);
        }
 }
 
-_PUBLIC_ enum TDB_ERROR tdb_summary(struct tdb_context *tdb,
-                          enum tdb_summary_flags flags,
+_PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb,
+                          enum ntdb_summary_flags flags,
                           char **summary)
 {
-       tdb_len_t len;
+       ntdb_len_t len;
        size_t num_caps = 0;
        struct tally *ftables, *hashes, *freet, *keys, *data, *extra, *uncoal,
                *chains;
        char *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg;
-       enum TDB_ERROR ecode;
+       enum NTDB_ERROR ecode;
 
        hashesg = freeg = keysg = datag = extrag = uncoalg = NULL;
 
-       ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
-       if (ecode != TDB_SUCCESS) {
-               return tdb->last_error = ecode;
+       ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
+       if (ecode != NTDB_SUCCESS) {
+               return ntdb->last_error = ecode;
        }
 
-       ecode = tdb_lock_expand(tdb, F_RDLCK);
-       if (ecode != TDB_SUCCESS) {
-               tdb_allrecord_unlock(tdb, F_RDLCK);
-               return tdb->last_error = ecode;
+       ecode = ntdb_lock_expand(ntdb, F_RDLCK);
+       if (ecode != NTDB_SUCCESS) {
+               ntdb_allrecord_unlock(ntdb, F_RDLCK);
+               return ntdb->last_error = ecode;
        }
 
        /* Start stats off empty. */
@@ -229,19 +229,19 @@ _PUBLIC_ enum TDB_ERROR tdb_summary(struct tdb_context *tdb,
        chains = tally_new(HISTO_HEIGHT);
        if (!ftables || !hashes || !freet || !keys || !data || !extra
            || !uncoal || !chains) {
-               ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                  "tdb_summary: failed to allocate"
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                  "ntdb_summary: failed to allocate"
                                   " tally structures");
                goto unlock;
        }
 
-       ecode = summarize(tdb, hashes, ftables, freet, keys, data, extra,
+       ecode = summarize(ntdb, hashes, ftables, freet, keys, data, extra,
                          uncoal, chains, &num_caps);
-       if (ecode != TDB_SUCCESS) {
+       if (ecode != NTDB_SUCCESS) {
                goto unlock;
        }
 
-       if (flags & TDB_SUMMARY_HISTOGRAMS) {
+       if (flags & NTDB_SUMMARY_HISTOGRAMS) {
                hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
                freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
                keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
@@ -263,13 +263,13 @@ _PUBLIC_ enum TDB_ERROR tdb_summary(struct tdb_context *tdb,
 
        *summary = malloc(len);
        if (!*summary) {
-               ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                  "tdb_summary: failed to allocate string");
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                  "ntdb_summary: failed to allocate string");
                goto unlock;
        }
 
        sprintf(*summary, SUMMARY_FORMAT,
-               (size_t)tdb->file->map_size,
+               (size_t)ntdb->file->map_size,
                tally_total(keys, NULL) + tally_total(data, NULL),
                tally_num(keys),
                tally_min(keys), tally_mean(keys), tally_max(keys),
@@ -284,29 +284,29 @@ _PUBLIC_ enum TDB_ERROR tdb_summary(struct tdb_context *tdb,
                tally_total(uncoal, NULL),
                tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
                uncoalg ? uncoalg : "",
-               (unsigned)count_hash(tdb, offsetof(struct tdb_header,
+               (unsigned)count_hash(ntdb, offsetof(struct ntdb_header,
                                                   hashtable),
-                                    TDB_TOPLEVEL_HASH_BITS),
-               1 << TDB_TOPLEVEL_HASH_BITS,
+                                    NTDB_TOPLEVEL_HASH_BITS),
+               1 << NTDB_TOPLEVEL_HASH_BITS,
                tally_num(chains),
                tally_num(hashes),
                tally_min(hashes), tally_mean(hashes), tally_max(hashes),
                hashesg ? hashesg : "",
-               tally_total(keys, NULL) * 100.0 / tdb->file->map_size,
-               tally_total(data, NULL) * 100.0 / tdb->file->map_size,
-               tally_total(extra, NULL) * 100.0 / tdb->file->map_size,
-               tally_total(freet, NULL) * 100.0 / tdb->file->map_size,
+               tally_total(keys, NULL) * 100.0 / ntdb->file->map_size,
+               tally_total(data, NULL) * 100.0 / ntdb->file->map_size,
+               tally_total(extra, NULL) * 100.0 / ntdb->file->map_size,
+               tally_total(freet, NULL) * 100.0 / ntdb->file->map_size,
                (tally_num(keys) + tally_num(freet) + tally_num(hashes))
-               * sizeof(struct tdb_used_record) * 100.0 / tdb->file->map_size,
-               tally_num(ftables) * sizeof(struct tdb_freetable)
-               * 100.0 / tdb->file->map_size,
+               * sizeof(struct ntdb_used_record) * 100.0 / ntdb->file->map_size,
+               tally_num(ftables) * sizeof(struct ntdb_freetable)
+               * 100.0 / ntdb->file->map_size,
                (tally_num(hashes)
-                * (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
-                + (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS)
-                + sizeof(struct tdb_chain) * tally_num(chains))
-               * 100.0 / tdb->file->map_size);
+                * (sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS)
+                + (sizeof(ntdb_off_t) << NTDB_TOPLEVEL_HASH_BITS)
+                + sizeof(struct ntdb_chain) * tally_num(chains))
+               * 100.0 / ntdb->file->map_size);
 
-       add_capabilities(tdb, *summary);
+       add_capabilities(ntdb, *summary);
 
 unlock:
        free(hashesg);
@@ -324,7 +324,7 @@ unlock:
        free(ftables);
        free(chains);
 
-       tdb_allrecord_unlock(tdb, F_RDLCK);
-       tdb_unlock_expand(tdb, F_RDLCK);
-       return tdb->last_error = ecode;
+       ntdb_allrecord_unlock(ntdb, F_RDLCK);
+       ntdb_unlock_expand(ntdb, F_RDLCK);
+       return ntdb->last_error = ecode;
 }
similarity index 59%
rename from lib/tdb2/test/api-12-store.c
rename to lib/ntdb/test/api-12-store.c
index 6a9dd95f5fb8b9bda916751de3cc950c845d14a0..24d94987559d4a5a70c56b3ebe8ac9f214b4a938 100644 (file)
@@ -1,5 +1,5 @@
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <ccan/hash/hash.h>
 #include <sys/types.h>
@@ -18,38 +18,38 @@ static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
 int main(int argc, char *argv[])
 {
        unsigned int i, j;
-       struct tdb_context *tdb;
+       struct ntdb_context *ntdb;
        uint64_t seed = 16014841315512641303ULL;
-       union tdb_attribute fixed_hattr
-               = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
+       union ntdb_attribute fixed_hattr
+               = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
                              .fn = fixedhash,
                              .data = &seed } };
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
-       struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
+       NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
 
        fixed_hattr.base.next = &tap_log_attr;
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 3) + 1);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-12-store.tdb", flags[i],
+               ntdb = ntdb_open("run-12-store.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
                /* We seemed to lose some keys.
                 * Insert and check they're in there! */
                for (j = 0; j < 500; j++) {
-                       struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
-                       ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
-                       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-                       ok1(tdb_deq(d, data));
+                       NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
+                       ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+                       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+                       ok1(ntdb_deq(d, data));
                        free(d.dptr);
                }
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
        ok1(tap_log_messages == 0);
diff --git a/lib/ntdb/test/api-13-delete.c b/lib/ntdb/test/api-13-delete.c
new file mode 100644 (file)
index 0000000..182252b
--- /dev/null
@@ -0,0 +1,205 @@
+#include "private.h" // For NTDB_TOPLEVEL_HASH_BITS
+#include <ccan/hash/hash.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "ntdb.h"
+#include "tap-interface.h"
+#include "logging.h"
+
+/* We rig the hash so adjacent-numbered records always clash. */
+static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
+{
+       return ((uint64_t)*(const unsigned int *)key)
+               << (64 - NTDB_TOPLEVEL_HASH_BITS - 1);
+}
+
+/* We use the same seed which we saw a failure on. */
+static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
+{
+       return hash64_stable((const unsigned char *)key, len,
+                            *(uint64_t *)p);
+}
+
+static bool store_records(struct ntdb_context *ntdb)
+{
+       int i;
+       NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA d, data = { (unsigned char *)&i, sizeof(i) };
+
+       for (i = 0; i < 1000; i++) {
+               if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+                       return false;
+               ntdb_fetch(ntdb, key, &d);
+               if (!ntdb_deq(d, data))
+                       return false;
+               free(d.dptr);
+       }
+       return true;
+}
+
+static void test_val(struct ntdb_context *ntdb, uint64_t val)
+{
+       uint64_t v;
+       NTDB_DATA key = { (unsigned char *)&v, sizeof(v) };
+       NTDB_DATA d, data = { (unsigned char *)&v, sizeof(v) };
+
+       /* Insert an entry, then delete it. */
+       v = val;
+       /* Delete should fail. */
+       ok1(ntdb_delete(ntdb, key) == NTDB_ERR_NOEXIST);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Insert should succeed. */
+       ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Delete should succeed. */
+       ok1(ntdb_delete(ntdb, key) == 0);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Re-add it, then add collision. */
+       ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+       v = val + 1;
+       ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Can find both? */
+       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+       ok1(d.dsize == data.dsize);
+       free(d.dptr);
+       v = val;
+       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+       ok1(d.dsize == data.dsize);
+       free(d.dptr);
+
+       /* Delete second one. */
+       v = val + 1;
+       ok1(ntdb_delete(ntdb, key) == 0);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Re-add */
+       ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Now, try deleting first one. */
+       v = val;
+       ok1(ntdb_delete(ntdb, key) == 0);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Can still find second? */
+       v = val + 1;
+       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+       ok1(d.dsize == data.dsize);
+       free(d.dptr);
+
+       /* Now, this will be ideally placed. */
+       v = val + 2;
+       ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* This will collide with both. */
+       v = val;
+       ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+
+       /* We can still find them all, right? */
+       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+       ok1(d.dsize == data.dsize);
+       free(d.dptr);
+       v = val + 1;
+       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+       ok1(d.dsize == data.dsize);
+       free(d.dptr);
+       v = val + 2;
+       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+       ok1(d.dsize == data.dsize);
+       free(d.dptr);
+
+       /* And if we delete val + 1, that val + 2 should not move! */
+       v = val + 1;
+       ok1(ntdb_delete(ntdb, key) == 0);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       v = val;
+       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+       ok1(d.dsize == data.dsize);
+       free(d.dptr);
+       v = val + 2;
+       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+       ok1(d.dsize == data.dsize);
+       free(d.dptr);
+
+       /* Delete those two, so we are empty. */
+       ok1(ntdb_delete(ntdb, key) == 0);
+       v = val;
+       ok1(ntdb_delete(ntdb, key) == 0);
+
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int i, j;
+       struct ntdb_context *ntdb;
+       uint64_t seed = 16014841315512641303ULL;
+       union ntdb_attribute clash_hattr
+               = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+                             .fn = clash } };
+       union ntdb_attribute fixed_hattr
+               = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+                             .fn = fixedhash,
+                             .data = &seed } };
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+       /* These two values gave trouble before. */
+       int vals[] = { 755, 837 };
+
+       clash_hattr.base.next = &tap_log_attr;
+       fixed_hattr.base.next = &tap_log_attr;
+
+       plan_tests(sizeof(flags) / sizeof(flags[0])
+                  * (39 * 3 + 5 + sizeof(vals)/sizeof(vals[0])*2) + 1);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               ntdb = ntdb_open("run-13-delete.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &clash_hattr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               /* Check start of hash table. */
+               test_val(ntdb, 0);
+
+               /* Check end of hash table. */
+               test_val(ntdb, -1ULL);
+
+               /* Check mixed bitpattern. */
+               test_val(ntdb, 0x123456789ABCDEF0ULL);
+
+               ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
+                                  && ntdb->file->num_lockrecs == 0));
+               ntdb_close(ntdb);
+
+               /* Deleting these entries in the db gave problems. */
+               ntdb = ntdb_open("run-13-delete.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               ok1(store_records(ntdb));
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               for (j = 0; j < sizeof(vals)/sizeof(vals[0]); j++) {
+                       NTDB_DATA key;
+
+                       key.dptr = (unsigned char *)&vals[j];
+                       key.dsize = sizeof(vals[j]);
+                       ok1(ntdb_delete(ntdb, key) == 0);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               }
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
diff --git a/lib/ntdb/test/api-14-exists.c b/lib/ntdb/test/api-14-exists.c
new file mode 100644 (file)
index 0000000..88663ca
--- /dev/null
@@ -0,0 +1,54 @@
+#include "config.h"
+#include "ntdb.h"
+#include "tap-interface.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "logging.h"
+
+static bool test_records(struct ntdb_context *ntdb)
+{
+       int i;
+       NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+
+       for (i = 0; i < 1000; i++) {
+               if (ntdb_exists(ntdb, key))
+                       return false;
+               if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+                       return false;
+               if (!ntdb_exists(ntdb, key))
+                       return false;
+       }
+
+       for (i = 0; i < 1000; i++) {
+               if (!ntdb_exists(ntdb, key))
+                       return false;
+               if (ntdb_delete(ntdb, key) != 0)
+                       return false;
+               if (ntdb_exists(ntdb, key))
+                       return false;
+       }
+       return true;
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               ntdb = ntdb_open("run-14-exists.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               if (ok1(ntdb))
+                       ok1(test_records(ntdb));
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
diff --git a/lib/ntdb/test/api-16-wipe_all.c b/lib/ntdb/test/api-16-wipe_all.c
new file mode 100644 (file)
index 0000000..c1bda8e
--- /dev/null
@@ -0,0 +1,46 @@
+#include "config.h"
+#include "ntdb.h"
+#include "tap-interface.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "logging.h"
+
+static bool add_records(struct ntdb_context *ntdb)
+{
+       int i;
+       NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+
+       for (i = 0; i < 1000; i++) {
+               if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+                       return false;
+       }
+       return true;
+}
+
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               ntdb = ntdb_open("run-16-wipe_all.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               if (ok1(ntdb)) {
+                       NTDB_DATA key;
+                       ok1(add_records(ntdb));
+                       ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS);
+                       ok1(ntdb_firstkey(ntdb, &key) == NTDB_ERR_NOEXIST);
+                       ntdb_close(ntdb);
+               }
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
diff --git a/lib/ntdb/test/api-21-parse_record.c b/lib/ntdb/test/api-21-parse_record.c
new file mode 100644 (file)
index 0000000..fa48562
--- /dev/null
@@ -0,0 +1,67 @@
+#include "config.h"
+#include "ntdb.h"
+#include "tap-interface.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "logging.h"
+
+static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data, NTDB_DATA *expected)
+{
+       if (!ntdb_deq(data, *expected))
+               return NTDB_ERR_EINVAL;
+       return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR parse_err(NTDB_DATA key, NTDB_DATA data, void *unused)
+{
+       return 100;
+}
+
+static bool test_records(struct ntdb_context *ntdb)
+{
+       int i;
+       NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+
+       for (i = 0; i < 1000; i++) {
+               if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+                       return false;
+       }
+
+       for (i = 0; i < 1000; i++) {
+               if (ntdb_parse_record(ntdb, key, parse, &data) != NTDB_SUCCESS)
+                       return false;
+       }
+
+       if (ntdb_parse_record(ntdb, key, parse, &data) != NTDB_ERR_NOEXIST)
+               return false;
+
+       /* Test error return from parse function. */
+       i = 0;
+       if (ntdb_parse_record(ntdb, key, parse_err, NULL) != 100)
+               return false;
+
+       return true;
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               ntdb = ntdb_open("api-21-parse_record.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               if (ok1(ntdb))
+                       ok1(test_records(ntdb));
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
diff --git a/lib/ntdb/test/api-55-transaction.c b/lib/ntdb/test/api-55-transaction.c
new file mode 100644 (file)
index 0000000..d51dd0b
--- /dev/null
@@ -0,0 +1,73 @@
+#include "private.h" // struct ntdb_context
+#include "ntdb.h"
+#include "tap-interface.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include "logging.h"
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       unsigned char *buffer;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data;
+
+       buffer = malloc(1000);
+       for (i = 0; i < 1000; i++)
+               buffer[i] = i;
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 20 + 1);
+
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               ntdb = ntdb_open("run-55-transaction.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               ok1(ntdb_transaction_start(ntdb) == 0);
+               data.dptr = buffer;
+               data.dsize = 1000;
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+               ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
+               ok1(data.dsize == 1000);
+               ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
+               free(data.dptr);
+
+               /* Cancelling a transaction means no store */
+               ntdb_transaction_cancel(ntdb);
+               ok1(ntdb->file->allrecord_lock.count == 0
+                   && ntdb->file->num_lockrecs == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ok1(ntdb_fetch(ntdb, key, &data) == NTDB_ERR_NOEXIST);
+
+               /* Commit the transaction. */
+               ok1(ntdb_transaction_start(ntdb) == 0);
+               data.dptr = buffer;
+               data.dsize = 1000;
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+               ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
+               ok1(data.dsize == 1000);
+               ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
+               free(data.dptr);
+               ok1(ntdb_transaction_commit(ntdb) == 0);
+               ok1(ntdb->file->allrecord_lock.count == 0
+                   && ntdb->file->num_lockrecs == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
+               ok1(data.dsize == 1000);
+               ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
+               free(data.dptr);
+
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       free(buffer);
+       return exit_status();
+}
similarity index 54%
rename from lib/tdb2/test/api-80-tdb_fd.c
rename to lib/ntdb/test/api-80-tdb_fd.c
index 63967b8aa6b152161f249769d9f4a7408e22e805..39a9df414eb64f38cdf72729f51e7841ae69ee8b 100644 (file)
@@ -1,5 +1,5 @@
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -9,23 +9,23 @@
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("api-80-tdb_fd.tdb", flags[i],
+               ntdb = ntdb_open("api-80-ntdb_fd.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
+               if (!ok1(ntdb))
                        continue;
 
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tdb_fd(tdb) == -1);
+               if (flags[i] & NTDB_INTERNAL)
+                       ok1(ntdb_fd(ntdb) == -1);
                else
-                       ok1(tdb_fd(tdb) > 2);
-               tdb_close(tdb);
+                       ok1(ntdb_fd(ntdb) > 2);
+               ntdb_close(ntdb);
                ok1(tap_log_messages == 0);
        }
        return exit_status();
diff --git a/lib/ntdb/test/api-81-seqnum.c b/lib/ntdb/test/api-81-seqnum.c
new file mode 100644 (file)
index 0000000..93ad53a
--- /dev/null
@@ -0,0 +1,69 @@
+#include "config.h"
+#include "ntdb.h"
+#include "tap-interface.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include "logging.h"
+
+int main(int argc, char *argv[])
+{
+       unsigned int i, seq;
+       struct ntdb_context *ntdb;
+       NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data = ntdb_mkdata("data", 4);
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 15 + 4 * 13);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               ntdb = ntdb_open("api-81-seqnum.ntdb", flags[i]|NTDB_SEQNUM,
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               if (!ok1(ntdb))
+                       continue;
+
+               seq = 0;
+               ok1(ntdb_get_seqnum(ntdb) == seq);
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+               ok1(ntdb_get_seqnum(ntdb) == ++seq);
+               /* Fetch doesn't change seqnum */
+               if (ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS))
+                       free(d.dptr);
+               ok1(ntdb_get_seqnum(ntdb) == seq);
+               ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
+               ok1(ntdb_get_seqnum(ntdb) == ++seq);
+
+               ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
+               ok1(ntdb_get_seqnum(ntdb) == ++seq);
+               /* Empty append works */
+               ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
+               ok1(ntdb_get_seqnum(ntdb) == ++seq);
+
+               ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS);
+               ok1(ntdb_get_seqnum(ntdb) == ++seq);
+
+               if (!(flags[i] & NTDB_INTERNAL)) {
+                       ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
+                       ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+                       ok1(ntdb_get_seqnum(ntdb) == ++seq);
+                       ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
+                       ok1(ntdb_get_seqnum(ntdb) == ++seq);
+                       ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
+                       ok1(ntdb_get_seqnum(ntdb) == ++seq);
+                       ok1(ntdb_transaction_commit(ntdb) == NTDB_SUCCESS);
+                       ok1(ntdb_get_seqnum(ntdb) == seq);
+
+                       ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
+                       ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+                       ok1(ntdb_get_seqnum(ntdb) == seq + 1);
+                       ntdb_transaction_cancel(ntdb);
+                       ok1(ntdb_get_seqnum(ntdb) == seq);
+               }
+               ntdb_close(ntdb);
+               ok1(tap_log_messages == 0);
+       }
+       return exit_status();
+}
similarity index 59%
rename from lib/tdb2/test/api-82-lockattr.c
rename to lib/ntdb/test/api-82-lockattr.c
index b229eab83cd0232f2402827bc2cd7b904bb18958..51bb939f590946870fa8d4e7df253795ba4408bd 100644 (file)
@@ -1,5 +1,5 @@
-#include "private.h" // for tdb_fcntl_unlock
-#include "tdb2.h"
+#include "private.h" // for ntdb_fcntl_unlock
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -35,7 +35,7 @@ static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
 }
 
 static int trav_err;
-static int trav(struct tdb_context *tdb, TDB_DATA k, TDB_DATA d, int *terr)
+static int trav(struct ntdb_context *ntdb, NTDB_DATA k, NTDB_DATA d, int *terr)
 {
        *terr = trav_err;
        return 0;
@@ -44,193 +44,193 @@ static int trav(struct tdb_context *tdb, TDB_DATA k, TDB_DATA d, int *terr)
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-       union tdb_attribute lock_attr;
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+       union ntdb_attribute lock_attr;
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data = ntdb_mkdata("data", 4);
        int lock_err;
 
-       lock_attr.base.attr = TDB_ATTRIBUTE_FLOCK;
+       lock_attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
        lock_attr.base.next = &tap_log_attr;
        lock_attr.flock.lock = mylock;
-       lock_attr.flock.unlock = tdb_fcntl_unlock;
+       lock_attr.flock.unlock = ntdb_fcntl_unlock;
        lock_attr.flock.data = &lock_err;
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 80);
 
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               struct tdb_data d;
+               NTDB_DATA d;
 
                /* Nonblocking open; expect no error message. */
                lock_err = EAGAIN;
-               tdb = tdb_open("run-82-lockattr.tdb", flags[i],
+               ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
                ok(errno == lock_err, "Errno is %u", errno);
-               ok1(!tdb);
+               ok1(!ntdb);
                ok1(tap_log_messages == 0);
 
                lock_err = EINTR;
-               tdb = tdb_open("run-82-lockattr.tdb", flags[i],
+               ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
                ok(errno == lock_err, "Errno is %u", errno);
-               ok1(!tdb);
+               ok1(!ntdb);
                ok1(tap_log_messages == 0);
 
                /* Forced fail open. */
                lock_err = ENOMEM;
-               tdb = tdb_open("run-82-lockattr.tdb", flags[i],
+               ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
                ok1(errno == lock_err);
-               ok1(!tdb);
+               ok1(!ntdb);
                ok1(tap_log_messages == 1);
                tap_log_messages = 0;
 
                lock_err = 0;
-               tdb = tdb_open("run-82-lockattr.tdb", flags[i],
+               ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
-               if (!ok1(tdb))
+               if (!ok1(ntdb))
                        continue;
                ok1(tap_log_messages == 0);
 
                /* Nonblocking store. */
                lock_err = EAGAIN;
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
+               ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = EINTR;
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
+               ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = ENOMEM;
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
+               ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 1);
                tap_log_messages = 0;
 
                /* Nonblocking fetch. */
                lock_err = EAGAIN;
-               ok1(!tdb_exists(tdb, key));
+               ok1(!ntdb_exists(ntdb, key));
                ok1(tap_log_messages == 0);
                lock_err = EINTR;
-               ok1(!tdb_exists(tdb, key));
+               ok1(!ntdb_exists(ntdb, key));
                ok1(tap_log_messages == 0);
                lock_err = ENOMEM;
-               ok1(!tdb_exists(tdb, key));
+               ok1(!ntdb_exists(ntdb, key));
                ok1(tap_log_messages == 1);
                tap_log_messages = 0;
 
                lock_err = EAGAIN;
-               ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
+               ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = EINTR;
-               ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
+               ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = ENOMEM;
-               ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
+               ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 1);
                tap_log_messages = 0;
 
                /* Nonblocking delete. */
                lock_err = EAGAIN;
-               ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK);
+               ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = EINTR;
-               ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK);
+               ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = ENOMEM;
-               ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK);
+               ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 1);
                tap_log_messages = 0;
 
                /* Nonblocking locks. */
                lock_err = EAGAIN;
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
+               ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = EINTR;
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
+               ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = ENOMEM;
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
+               ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 1);
                tap_log_messages = 0;
 
                lock_err = EAGAIN;
-               ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK);
+               ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = EINTR;
-               ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK);
+               ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = ENOMEM;
-               ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK);
+               ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 1);
                tap_log_messages = 0;
 
                lock_err = EAGAIN;
-               ok1(tdb_lockall(tdb) == TDB_ERR_LOCK);
+               ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = EINTR;
-               ok1(tdb_lockall(tdb) == TDB_ERR_LOCK);
+               ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = ENOMEM;
-               ok1(tdb_lockall(tdb) == TDB_ERR_LOCK);
+               ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK);
                /* This actually does divide and conquer. */
                ok1(tap_log_messages > 0);
                tap_log_messages = 0;
 
                lock_err = EAGAIN;
-               ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK);
+               ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = EINTR;
-               ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK);
+               ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = ENOMEM;
-               ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK);
+               ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK);
                ok1(tap_log_messages > 0);
                tap_log_messages = 0;
 
                /* Nonblocking traverse; go nonblock partway through. */
                lock_err = 0;
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
+               ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
                trav_err = EAGAIN;
-               ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK);
+               ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                trav_err = EINTR;
                lock_err = 0;
-               ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK);
+               ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                trav_err = ENOMEM;
                lock_err = 0;
-               ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK);
+               ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 1);
                tap_log_messages = 0;
 
                /* Nonblocking transactions. */
                lock_err = EAGAIN;
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
+               ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = EINTR;
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
+               ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
                lock_err = ENOMEM;
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
+               ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 1);
                tap_log_messages = 0;
 
                /* Nonblocking transaction prepare. */
                lock_err = 0;
-               ok1(tdb_transaction_start(tdb) == 0);
-               ok1(tdb_delete(tdb, key) == 0);
+               ok1(ntdb_transaction_start(ntdb) == 0);
+               ok1(ntdb_delete(ntdb, key) == 0);
 
                lock_err = EAGAIN;
-               ok1(tdb_transaction_prepare_commit(tdb) == TDB_ERR_LOCK);
+               ok1(ntdb_transaction_prepare_commit(ntdb) == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
 
                lock_err = 0;
-               ok1(tdb_transaction_prepare_commit(tdb) == 0);
-               ok1(tdb_transaction_commit(tdb) == 0);
+               ok1(ntdb_transaction_prepare_commit(ntdb) == 0);
+               ok1(ntdb_transaction_commit(ntdb) == 0);
 
                /* And the transaction was committed, right? */
-               ok1(!tdb_exists(tdb, key));
-               tdb_close(tdb);
+               ok1(!ntdb_exists(ntdb, key));
+               ntdb_close(ntdb);
                ok1(tap_log_messages == 0);
        }
        return exit_status();
similarity index 65%
rename from lib/tdb2/test/api-83-openhook.c
rename to lib/ntdb/test/api-83-openhook.c
index 191cf068c10209206db33ce576aa4d4b45e188c8..9f474c9ab8b87675f0d28b97893fdbbe20be52a9 100644 (file)
@@ -1,5 +1,5 @@
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include "external-agent.h"
 #include "logging.h"
 
-static enum TDB_ERROR clear_if_first(int fd, void *arg)
+static enum NTDB_ERROR clear_if_first(int fd, void *arg)
 {
 /* We hold a lock offset 4 always, so we can tell if anyone is holding it.
- * (This is compatible with tdb1's TDB_CLEAR_IF_FIRST flag).  */
+ * (This is compatible with tdb's TDB_CLEAR_IF_FIRST flag).  */
        struct flock fl;
 
        if (arg != clear_if_first)
-               return TDB_ERR_CORRUPT;
+               return NTDB_ERR_CORRUPT;
 
        fl.l_type = F_WRLCK;
        fl.l_whence = SEEK_SET;
@@ -29,27 +29,27 @@ static enum TDB_ERROR clear_if_first(int fd, void *arg)
                /* We must be first ones to open it! */
                diag("truncating file!");
                if (ftruncate(fd, 0) != 0) {
-                       return TDB_ERR_IO;
+                       return NTDB_ERR_IO;
                }
        }
        fl.l_type = F_RDLCK;
        if (fcntl(fd, F_SETLKW, &fl) != 0) {
-               return TDB_ERR_IO;
+               return NTDB_ERR_IO;
        }
-       return TDB_SUCCESS;
+       return NTDB_SUCCESS;
 }
 
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
+       struct ntdb_context *ntdb;
        struct agent *agent;
-       union tdb_attribute cif;
-       struct tdb_data key = tdb_mkdata("key", 3);
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
+       union ntdb_attribute cif;
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
 
-       cif.openhook.base.attr = TDB_ATTRIBUTE_OPENHOOK;
+       cif.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK;
        cif.openhook.base.next = &tap_log_attr;
        cif.openhook.fn = clear_if_first;
        cif.openhook.data = clear_if_first;
@@ -58,33 +58,33 @@ int main(int argc, char *argv[])
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 13);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
                /* Create it */
-               tdb = tdb_open("run-83-openhook.tdb", flags[i],
+               ntdb = ntdb_open("run-83-openhook.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
-               ok1(tdb);
-               ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0);
-               tdb_close(tdb);
+               ok1(ntdb);
+               ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0);
+               ntdb_close(ntdb);
 
                /* Now, open with CIF, should clear it. */
-               tdb = tdb_open("run-83-openhook.tdb", flags[i],
+               ntdb = ntdb_open("run-83-openhook.ntdb", flags[i],
                               O_RDWR, 0, &cif);
-               ok1(tdb);
-               ok1(!tdb_exists(tdb, key));
-               ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0);
+               ok1(ntdb);
+               ok1(!ntdb_exists(ntdb, key));
+               ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0);
 
                /* Agent should not clear it, since it's still open. */
                ok1(external_agent_operation(agent, OPEN_WITH_HOOK,
-                                            "run-83-openhook.tdb") == SUCCESS);
+                                            "run-83-openhook.ntdb") == SUCCESS);
                ok1(external_agent_operation(agent, FETCH, "key") == SUCCESS);
                ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS);
 
                /* Still exists for us too. */
-               ok1(tdb_exists(tdb, key));
+               ok1(ntdb_exists(ntdb, key));
 
                /* Close it, now agent should clear it. */
-               tdb_close(tdb);
+               ntdb_close(ntdb);
 
                ok1(external_agent_operation(agent, OPEN_WITH_HOOK,
-                                            "run-83-openhook.tdb") == SUCCESS);
+                                            "run-83-openhook.ntdb") == SUCCESS);
                ok1(external_agent_operation(agent, FETCH, "key") == FAILED);
                ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS);
 
similarity index 57%
rename from lib/tdb2/test/api-91-get-stats.c
rename to lib/ntdb/test/api-91-get-stats.c
index 395db3fb18166490c6b222f7c22c3dda7c4208aa..786885b44c5fdd9d0272581b6166ad24152707bf 100644 (file)
@@ -1,5 +1,5 @@
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 11);
 
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               union tdb_attribute *attr;
-               struct tdb_data key = tdb_mkdata("key", 3);
+               union ntdb_attribute *attr;
+               NTDB_DATA key = ntdb_mkdata("key", 3);
 
-               tdb = tdb_open("run-91-get-stats.tdb", flags[i],
+               ntdb = ntdb_open("run-91-get-stats.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0);
+               ok1(ntdb);
+               ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0);
 
                /* Use malloc so valgrind will catch overruns. */
                attr = malloc(sizeof *attr);
-               attr->stats.base.attr = TDB_ATTRIBUTE_STATS;
+               attr->stats.base.attr = NTDB_ATTRIBUTE_STATS;
                attr->stats.size = sizeof(*attr);
 
-               ok1(tdb_get_attribute(tdb, attr) == 0);
+               ok1(ntdb_get_attribute(ntdb, attr) == 0);
                ok1(attr->stats.size == sizeof(*attr));
                ok1(attr->stats.allocs > 0);
                ok1(attr->stats.expands > 0);
@@ -39,18 +39,18 @@ int main(int argc, char *argv[])
                free(attr);
 
                /* Try short one. */
-               attr = malloc(offsetof(struct tdb_attribute_stats, allocs)
+               attr = malloc(offsetof(struct ntdb_attribute_stats, allocs)
                              + sizeof(attr->stats.allocs));
-               attr->stats.base.attr = TDB_ATTRIBUTE_STATS;
-               attr->stats.size = offsetof(struct tdb_attribute_stats, allocs)
+               attr->stats.base.attr = NTDB_ATTRIBUTE_STATS;
+               attr->stats.size = offsetof(struct ntdb_attribute_stats, allocs)
                        + sizeof(attr->stats.allocs);
-               ok1(tdb_get_attribute(tdb, attr) == 0);
+               ok1(ntdb_get_attribute(ntdb, attr) == 0);
                ok1(attr->stats.size == sizeof(*attr));
                ok1(attr->stats.allocs > 0);
                free(attr);
                ok1(tap_log_messages == 0);
 
-               tdb_close(tdb);
+               ntdb_close(ntdb);
 
        }
        return exit_status();
diff --git a/lib/ntdb/test/api-92-get-set-readonly.c b/lib/ntdb/test/api-92-get-set-readonly.c
new file mode 100644 (file)
index 0000000..7abd304
--- /dev/null
@@ -0,0 +1,105 @@
+#include "config.h"
+#include "ntdb.h"
+#include "tap-interface.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "logging.h"
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data = ntdb_mkdata("data", 4);
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 48);
+
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               /* RW -> R0 */
+               ntdb = ntdb_open("run-92-get-set-readonly.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               ok1(ntdb);
+               ok1(!(ntdb_get_flags(ntdb) & NTDB_RDONLY));
+
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS);
+
+               ntdb_add_flag(ntdb, NTDB_RDONLY);
+               ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY);
+
+               /* Can't store, append, delete. */
+               ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 1);
+               ok1(ntdb_append(ntdb, key, data) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 2);
+               ok1(ntdb_delete(ntdb, key) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 3);
+
+               /* Can't start a transaction, or any write lock. */
+               ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 4);
+               ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 5);
+               ok1(ntdb_lockall(ntdb) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 6);
+               ok1(ntdb_wipe_all(ntdb) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 7);
+
+               /* Back to RW. */
+               ntdb_remove_flag(ntdb, NTDB_RDONLY);
+               ok1(!(ntdb_get_flags(ntdb) & NTDB_RDONLY));
+
+               ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == NTDB_SUCCESS);
+               ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
+               ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
+
+               ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS);
+               ok1(ntdb_transaction_commit(ntdb) == NTDB_SUCCESS);
+
+               ok1(ntdb_chainlock(ntdb, key) == NTDB_SUCCESS);
+               ntdb_chainunlock(ntdb, key);
+               ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
+               ntdb_unlockall(ntdb);
+               ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS);
+               ok1(tap_log_messages == 7);
+
+               ntdb_close(ntdb);
+
+               /* R0 -> RW */
+               ntdb = ntdb_open("run-92-get-set-readonly.ntdb", flags[i],
+                              O_RDONLY, 0600, &tap_log_attr);
+               ok1(ntdb);
+               ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY);
+
+               /* Can't store, append, delete. */
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 8);
+               ok1(ntdb_append(ntdb, key, data) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 9);
+               ok1(ntdb_delete(ntdb, key) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 10);
+
+               /* Can't start a transaction, or any write lock. */
+               ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 11);
+               ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 12);
+               ok1(ntdb_lockall(ntdb) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 13);
+               ok1(ntdb_wipe_all(ntdb) == NTDB_ERR_RDONLY);
+               ok1(tap_log_messages == 14);
+
+               /* Can't remove NTDB_RDONLY since we opened with O_RDONLY */
+               ntdb_remove_flag(ntdb, NTDB_RDONLY);
+               ok1(tap_log_messages == 15);
+               ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY);
+               ntdb_close(ntdb);
+
+               ok1(tap_log_messages == 15);
+               tap_log_messages = 0;
+       }
+       return exit_status();
+}
similarity index 57%
rename from lib/tdb2/test/api-93-repack.c
rename to lib/ntdb/test/api-93-repack.c
index 910eb9b301effa4ac1ca0d1f15cb346a5d36c58a..168bc24c0a9470a57a71512480c82093ed57dc6e 100644 (file)
@@ -1,5 +1,5 @@
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -8,21 +8,21 @@
 
 #define NUM_TESTS 1000
 
-static bool store_all(struct tdb_context *tdb)
+static bool store_all(struct ntdb_context *ntdb)
 {
        unsigned int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data dbuf = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA dbuf = { (unsigned char *)&i, sizeof(i) };
 
        for (i = 0; i < NUM_TESTS; i++) {
-               if (tdb_store(tdb, key, dbuf, TDB_INSERT) != TDB_SUCCESS)
+               if (ntdb_store(ntdb, key, dbuf, NTDB_INSERT) != NTDB_SUCCESS)
                        return false;
        }
        return true;
 }
 
-static int mark_entry(struct tdb_context *tdb,
-                     TDB_DATA key, TDB_DATA data, bool found[])
+static int mark_entry(struct ntdb_context *ntdb,
+                     NTDB_DATA key, NTDB_DATA data, bool found[])
 {
        unsigned int num;
 
@@ -51,28 +51,28 @@ int main(int argc, char *argv[])
 {
        unsigned int i;
        bool found[NUM_TESTS];
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT
        };
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 6 + 1);
 
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-93-repack.tdb", flags[i],
+               ntdb = ntdb_open("run-93-repack.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        break;
 
-               ok1(store_all(tdb));
+               ok1(store_all(ntdb));
 
-               ok1(tdb_repack(tdb) == TDB_SUCCESS);
+               ok1(ntdb_repack(ntdb) == NTDB_SUCCESS);
                memset(found, 0, sizeof(found));
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-               ok1(tdb_traverse(tdb, mark_entry, found) == NUM_TESTS);
+               ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+               ok1(ntdb_traverse(ntdb, mark_entry, found) == NUM_TESTS);
                ok1(is_all_set(found, NUM_TESTS));
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
        ok1(tap_log_messages == 0);
diff --git a/lib/ntdb/test/api-add-remove-flags.c b/lib/ntdb/test/api-add-remove-flags.c
new file mode 100644 (file)
index 0000000..4888c32
--- /dev/null
@@ -0,0 +1,89 @@
+#include "private.h" // for ntdb_context
+#include "ntdb.h"
+#include "tap-interface.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "logging.h"
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+
+       plan_tests(87);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               ntdb = ntdb_open("run-add-remove-flags.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               ok1(ntdb_get_flags(ntdb) == ntdb->flags);
+               tap_log_messages = 0;
+               ntdb_add_flag(ntdb, NTDB_NOLOCK);
+               if (flags[i] & NTDB_INTERNAL)
+                       ok1(tap_log_messages == 1);
+               else {
+                       ok1(tap_log_messages == 0);
+                       ok1(ntdb_get_flags(ntdb) & NTDB_NOLOCK);
+               }
+
+               tap_log_messages = 0;
+               ntdb_add_flag(ntdb, NTDB_NOMMAP);
+               if (flags[i] & NTDB_INTERNAL)
+                       ok1(tap_log_messages == 1);
+               else {
+                       ok1(tap_log_messages == 0);
+                       ok1(ntdb_get_flags(ntdb) & NTDB_NOMMAP);
+                       ok1(ntdb->file->map_ptr == NULL);
+               }
+
+               tap_log_messages = 0;
+               ntdb_add_flag(ntdb, NTDB_NOSYNC);
+               if (flags[i] & NTDB_INTERNAL)
+                       ok1(tap_log_messages == 1);
+               else {
+                       ok1(tap_log_messages == 0);
+                       ok1(ntdb_get_flags(ntdb) & NTDB_NOSYNC);
+               }
+
+               ok1(ntdb_get_flags(ntdb) == ntdb->flags);
+
+               tap_log_messages = 0;
+               ntdb_remove_flag(ntdb, NTDB_NOLOCK);
+               if (flags[i] & NTDB_INTERNAL)
+                       ok1(tap_log_messages == 1);
+               else {
+                       ok1(tap_log_messages == 0);
+                       ok1(!(ntdb_get_flags(ntdb) & NTDB_NOLOCK));
+               }
+
+               tap_log_messages = 0;
+               ntdb_remove_flag(ntdb, NTDB_NOMMAP);
+               if (flags[i] & NTDB_INTERNAL)
+                       ok1(tap_log_messages == 1);
+               else {
+                       ok1(tap_log_messages == 0);
+                       ok1(!(ntdb_get_flags(ntdb) & NTDB_NOMMAP));
+                       ok1(ntdb->file->map_ptr != NULL);
+               }
+
+               tap_log_messages = 0;
+               ntdb_remove_flag(ntdb, NTDB_NOSYNC);
+               if (flags[i] & NTDB_INTERNAL)
+                       ok1(tap_log_messages == 1);
+               else {
+                       ok1(tap_log_messages == 0);
+                       ok1(!(ntdb_get_flags(ntdb) & NTDB_NOSYNC));
+               }
+
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
similarity index 59%
rename from lib/tdb2/test/api-check-callback.c
rename to lib/ntdb/test/api-check-callback.c
index 96ef09f3bd09b9571f23c6da90ac0c15fb317f56..f74f04b59899e6af10d487b0ed737470b9a52917 100644 (file)
@@ -1,5 +1,5 @@
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -8,77 +8,77 @@
 
 #define NUM_RECORDS 1000
 
-static bool store_records(struct tdb_context *tdb)
+static bool store_records(struct ntdb_context *ntdb)
 {
        int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
 
        for (i = 0; i < NUM_RECORDS; i++)
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
+               if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
                        return false;
        return true;
 }
 
-static enum TDB_ERROR check(struct tdb_data key,
-                           struct tdb_data data,
+static enum NTDB_ERROR check(NTDB_DATA key,
+                           NTDB_DATA data,
                            bool *array)
 {
        int val;
 
        if (key.dsize != sizeof(val)) {
                diag("Wrong key size: %u\n", key.dsize);
-               return TDB_ERR_CORRUPT;
+               return NTDB_ERR_CORRUPT;
        }
 
        if (key.dsize != data.dsize
            || memcmp(key.dptr, data.dptr, sizeof(val)) != 0) {
                diag("Key and data differ\n");
-               return TDB_ERR_CORRUPT;
+               return NTDB_ERR_CORRUPT;
        }
 
        memcpy(&val, key.dptr, sizeof(val));
        if (val >= NUM_RECORDS || val < 0) {
                diag("check value %i\n", val);
-               return TDB_ERR_CORRUPT;
+               return NTDB_ERR_CORRUPT;
        }
 
        if (array[val]) {
                diag("Value %i already seen\n", val);
-               return TDB_ERR_CORRUPT;
+               return NTDB_ERR_CORRUPT;
        }
 
        array[val] = true;
-       return TDB_SUCCESS;
+       return NTDB_SUCCESS;
 }
 
 int main(int argc, char *argv[])
 {
        unsigned int i, j;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
                bool array[NUM_RECORDS];
 
-               tdb = tdb_open("run-check-callback.tdb", flags[i],
+               ntdb = ntdb_open("run-check-callback.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
-               ok1(store_records(tdb));
+               ok1(store_records(ntdb));
                for (j = 0; j < NUM_RECORDS; j++)
                        array[j] = false;
-               ok1(tdb_check(tdb, check, array) == TDB_SUCCESS);
+               ok1(ntdb_check(ntdb, check, array) == NTDB_SUCCESS);
                for (j = 0; j < NUM_RECORDS; j++)
                        if (!array[j])
                                break;
                ok1(j == NUM_RECORDS);
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
        ok1(tap_log_messages == 0);
similarity index 56%
rename from lib/tdb2/test/api-firstkey-nextkey.c
rename to lib/ntdb/test/api-firstkey-nextkey.c
index e5a7c5f8b5a98ee22a6fcf5f66362a80bb4cd93c..da1a68043bbf5c5995c4d306987435025f02c8ae 100644 (file)
@@ -1,5 +1,5 @@
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -9,14 +9,14 @@
 
 #define NUM_RECORDS 1000
 
-static bool store_records(struct tdb_context *tdb)
+static bool store_records(struct ntdb_context *ntdb)
 {
        int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
 
        for (i = 0; i < NUM_RECORDS; i++)
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
+               if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
                        return false;
        return true;
 }
@@ -26,7 +26,7 @@ struct trav_data {
        unsigned int calls;
 };
 
-static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p)
+static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *p)
 {
        struct trav_data *td = p;
        int val;
@@ -36,8 +36,8 @@ static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p)
        return 0;
 }
 
-/* Since tdb_nextkey frees dptr, we need to clone it. */
-static TDB_DATA dup_key(TDB_DATA key)
+/* Since ntdb_nextkey frees dptr, we need to clone it. */
+static NTDB_DATA dup_key(NTDB_DATA key)
 {
        void *p = malloc(key.dsize);
        memcpy(p, key.dptr, key.dsize);
@@ -50,81 +50,81 @@ int main(int argc, char *argv[])
        unsigned int i, j;
        int num;
        struct trav_data td;
-       TDB_DATA k;
-       struct tdb_context *tdb;
-       union tdb_attribute seed_attr;
-       enum TDB_ERROR ecode;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
+       NTDB_DATA k;
+       struct ntdb_context *ntdb;
+       union ntdb_attribute seed_attr;
+       enum NTDB_ERROR ecode;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+
+       seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
        seed_attr.base.next = &tap_log_attr;
        seed_attr.seed.seed = 6334326220117065685ULL;
 
        plan_tests(sizeof(flags) / sizeof(flags[0])
                   * (NUM_RECORDS*6 + (NUM_RECORDS-1)*3 + 22) + 1);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("api-firstkey-nextkey.tdb", flags[i],
+               ntdb = ntdb_open("api-firstkey-nextkey.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600,
                               &seed_attr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
-               ok1(tdb_firstkey(tdb, &k) == TDB_ERR_NOEXIST);
+               ok1(ntdb_firstkey(ntdb, &k) == NTDB_ERR_NOEXIST);
 
                /* One entry... */
                k.dptr = (unsigned char *)&num;
                k.dsize = sizeof(num);
                num = 0;
-               ok1(tdb_store(tdb, k, k, TDB_INSERT) == 0);
-               ok1(tdb_firstkey(tdb, &k) == TDB_SUCCESS);
+               ok1(ntdb_store(ntdb, k, k, NTDB_INSERT) == 0);
+               ok1(ntdb_firstkey(ntdb, &k) == NTDB_SUCCESS);
                ok1(k.dsize == sizeof(num));
                ok1(memcmp(k.dptr, &num, sizeof(num)) == 0);
-               ok1(tdb_nextkey(tdb, &k) == TDB_ERR_NOEXIST);
+               ok1(ntdb_nextkey(ntdb, &k) == NTDB_ERR_NOEXIST);
 
                /* Two entries. */
                k.dptr = (unsigned char *)&num;
                k.dsize = sizeof(num);
                num = 1;
-               ok1(tdb_store(tdb, k, k, TDB_INSERT) == 0);
-               ok1(tdb_firstkey(tdb, &k) == TDB_SUCCESS);
+               ok1(ntdb_store(ntdb, k, k, NTDB_INSERT) == 0);
+               ok1(ntdb_firstkey(ntdb, &k) == NTDB_SUCCESS);
                ok1(k.dsize == sizeof(num));
                memcpy(&num, k.dptr, sizeof(num));
                ok1(num == 0 || num == 1);
-               ok1(tdb_nextkey(tdb, &k) == TDB_SUCCESS);
+               ok1(ntdb_nextkey(ntdb, &k) == NTDB_SUCCESS);
                ok1(k.dsize == sizeof(j));
                memcpy(&j, k.dptr, sizeof(j));
                ok1(j == 0 || j == 1);
                ok1(j != num);
-               ok1(tdb_nextkey(tdb, &k) == TDB_ERR_NOEXIST);
+               ok1(ntdb_nextkey(ntdb, &k) == NTDB_ERR_NOEXIST);
 
                /* Clean up. */
                k.dptr = (unsigned char *)&num;
                k.dsize = sizeof(num);
                num = 0;
-               ok1(tdb_delete(tdb, k) == 0);
+               ok1(ntdb_delete(ntdb, k) == 0);
                num = 1;
-               ok1(tdb_delete(tdb, k) == 0);
+               ok1(ntdb_delete(ntdb, k) == 0);
 
                /* Now lots of records. */
-               ok1(store_records(tdb));
+               ok1(store_records(ntdb));
                td.calls = 0;
 
-               num = tdb_traverse(tdb, trav, &td);
+               num = ntdb_traverse(ntdb, trav, &td);
                ok1(num == NUM_RECORDS);
                ok1(td.calls == NUM_RECORDS);
 
-               /* Simple loop should match tdb_traverse */
-               for (j = 0, ecode = tdb_firstkey(tdb, &k); j < td.calls; j++) {
+               /* Simple loop should match ntdb_traverse */
+               for (j = 0, ecode = ntdb_firstkey(ntdb, &k); j < td.calls; j++) {
                        int val;
 
-                       ok1(ecode == TDB_SUCCESS);
+                       ok1(ecode == NTDB_SUCCESS);
                        ok1(k.dsize == sizeof(val));
                        memcpy(&val, k.dptr, k.dsize);
                        ok1(td.records[j] == val);
-                       ecode = tdb_nextkey(tdb, &k);
+                       ecode = ntdb_nextkey(ntdb, &k);
                }
 
                /* But arbitrary orderings should work too. */
@@ -132,26 +132,26 @@ int main(int argc, char *argv[])
                        k.dptr = (unsigned char *)&td.records[j-1];
                        k.dsize = sizeof(td.records[j-1]);
                        k = dup_key(k);
-                       ok1(tdb_nextkey(tdb, &k) == TDB_SUCCESS);
+                       ok1(ntdb_nextkey(ntdb, &k) == NTDB_SUCCESS);
                        ok1(k.dsize == sizeof(td.records[j]));
                        ok1(memcmp(k.dptr, &td.records[j], k.dsize) == 0);
                        free(k.dptr);
                }
 
                /* Even delete should work. */
-               for (j = 0, ecode = tdb_firstkey(tdb, &k);
-                    ecode != TDB_ERR_NOEXIST;
+               for (j = 0, ecode = ntdb_firstkey(ntdb, &k);
+                    ecode != NTDB_ERR_NOEXIST;
                     j++) {
-                       ok1(ecode == TDB_SUCCESS);
+                       ok1(ecode == NTDB_SUCCESS);
                        ok1(k.dsize == 4);
-                       ok1(tdb_delete(tdb, k) == 0);
-                       ecode = tdb_nextkey(tdb, &k);
+                       ok1(ntdb_delete(ntdb, k) == 0);
+                       ecode = ntdb_nextkey(ntdb, &k);
                }
 
                diag("delete using first/nextkey gave %u of %u records",
                     j, NUM_RECORDS);
                ok1(j == NUM_RECORDS);
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
        ok1(tap_log_messages == 0);
similarity index 53%
rename from lib/tdb2/test/api-fork-test.c
rename to lib/ntdb/test/api-fork-test.c
index 934c71cbe8c4a47e059d7e0ef924df0e276e9ec4..57bd6862823ab0be7ec1862eeab65d2473aacdf0 100644 (file)
@@ -1,17 +1,17 @@
 /* Test forking while holding lock.
  *
  * There are only five ways to do this currently:
- * (1) grab a tdb_chainlock, then fork.
- * (2) grab a tdb_lockall, then fork.
- * (3) grab a tdb_lockall_read, then fork.
+ * (1) grab a ntdb_chainlock, then fork.
+ * (2) grab a ntdb_lockall, then fork.
+ * (3) grab a ntdb_lockall_read, then fork.
  * (4) start a transaction, then fork.
- * (5) fork from inside a tdb_parse() callback.
+ * (5) fork from inside a ntdb_parse() callback.
  *
- * Note that we don't hold a lock across tdb_traverse callbacks, so
+ * Note that we don't hold a lock across ntdb_traverse callbacks, so
  * that doesn't matter.
  */
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <stdlib.h>
 #include "logging.h"
 
-static enum TDB_ERROR fork_in_parse(TDB_DATA key, TDB_DATA data,
-                                   struct tdb_context *tdb)
+static enum NTDB_ERROR fork_in_parse(NTDB_DATA key, NTDB_DATA data,
+                                   struct ntdb_context *ntdb)
 {
        int status;
 
        if (fork() == 0) {
                /* We expect this to fail. */
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
+               if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
                        exit(1);
 
-               if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
+               if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
                        exit(1);
 
                if (tap_log_messages != 2)
                        exit(2);
 
-               tdb_close(tdb);
+               ntdb_close(ntdb);
                if (tap_log_messages != 2)
                        exit(3);
                exit(0);
        }
        wait(&status);
        ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
-       return TDB_SUCCESS;
+       return NTDB_SUCCESS;
 }
 
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data = ntdb_mkdata("data", 4);
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 14);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
@@ -63,116 +63,116 @@ int main(int argc, char *argv[])
 
                tap_log_messages = 0;
 
-               tdb = tdb_open("run-fork-test.tdb", flags[i],
+               ntdb = ntdb_open("run-fork-test.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
+               if (!ok1(ntdb))
                        continue;
 
                /* Put a record in here. */
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_SUCCESS);
+               ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_SUCCESS);
 
-               ok1(tdb_chainlock(tdb, key) == TDB_SUCCESS);
+               ok1(ntdb_chainlock(ntdb, key) == NTDB_SUCCESS);
                if (fork() == 0) {
                        /* We expect this to fail. */
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
+                       if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
                                return 1;
 
-                       if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
+                       if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
                                return 1;
 
                        if (tap_log_messages != 2)
                                return 2;
 
-                       tdb_chainunlock(tdb, key);
+                       ntdb_chainunlock(ntdb, key);
                        if (tap_log_messages != 3)
                                return 3;
-                       tdb_close(tdb);
+                       ntdb_close(ntdb);
                        if (tap_log_messages != 3)
                                return 4;
                        return 0;
                }
                wait(&status);
                ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
-               tdb_chainunlock(tdb, key);
+               ntdb_chainunlock(ntdb, key);
 
-               ok1(tdb_lockall(tdb) == TDB_SUCCESS);
+               ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
                if (fork() == 0) {
                        /* We expect this to fail. */
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
+                       if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
                                return 1;
 
-                       if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
+                       if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
                                return 1;
 
                        if (tap_log_messages != 2)
                                return 2;
 
-                       tdb_unlockall(tdb);
+                       ntdb_unlockall(ntdb);
                        if (tap_log_messages != 2)
                                return 3;
-                       tdb_close(tdb);
+                       ntdb_close(ntdb);
                        if (tap_log_messages != 2)
                                return 4;
                        return 0;
                }
                wait(&status);
                ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
-               tdb_unlockall(tdb);
+               ntdb_unlockall(ntdb);
 
-               ok1(tdb_lockall_read(tdb) == TDB_SUCCESS);
+               ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS);
                if (fork() == 0) {
                        /* We expect this to fail. */
                        /* This would always fail anyway... */
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
+                       if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
                                return 1;
 
-                       if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
+                       if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
                                return 1;
 
                        if (tap_log_messages != 2)
                                return 2;
 
-                       tdb_unlockall_read(tdb);
+                       ntdb_unlockall_read(ntdb);
                        if (tap_log_messages != 2)
                                return 3;
-                       tdb_close(tdb);
+                       ntdb_close(ntdb);
                        if (tap_log_messages != 2)
                                return 4;
                        return 0;
                }
                wait(&status);
                ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
-               tdb_unlockall_read(tdb);
+               ntdb_unlockall_read(ntdb);
 
-               ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
+               ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
                /* If transactions is empty, noop "commit" succeeds. */
-               ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
+               ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
                if (fork() == 0) {
                        /* We expect this to fail. */
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
+                       if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
                                return 1;
 
-                       if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
+                       if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
                                return 1;
 
                        if (tap_log_messages != 2)
                                return 2;
 
-                       if (tdb_transaction_commit(tdb) != TDB_ERR_LOCK)
+                       if (ntdb_transaction_commit(ntdb) != NTDB_ERR_LOCK)
                                return 3;
 
-                       tdb_close(tdb);
+                       ntdb_close(ntdb);
                        if (tap_log_messages < 3)
                                return 4;
                        return 0;
                }
                wait(&status);
                ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
-               tdb_transaction_cancel(tdb);
+               ntdb_transaction_cancel(ntdb);
 
-               ok1(tdb_parse_record(tdb, key, fork_in_parse, tdb)
-                   == TDB_SUCCESS);
-               tdb_close(tdb);
+               ok1(ntdb_parse_record(ntdb, key, fork_in_parse, ntdb)
+                   == NTDB_SUCCESS);
+               ntdb_close(ntdb);
                ok1(tap_log_messages == 0);
        }
        return exit_status();
similarity index 74%
rename from lib/tdb2/test/api-locktimeout.c
rename to lib/ntdb/test/api-locktimeout.c
index dabe262f2506e8f07c25a407bf89cf94bc10c7c9..cafe067d0bb8e1a02ed18200c279d6bcf0c8c739 100644 (file)
@@ -1,5 +1,5 @@
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include "system/wait.h"
 #include <sys/types.h>
@@ -94,35 +94,35 @@ static int timeout_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
        return ret;
 }
 
-static int tdb_chainlock_with_timeout_internal(struct tdb_context *tdb,
-                                              TDB_DATA key,
+static int ntdb_chainlock_with_timeout_internal(struct ntdb_context *ntdb,
+                                              NTDB_DATA key,
                                               unsigned int timeout,
                                               int rw_type)
 {
-       union tdb_attribute locking;
-       enum TDB_ERROR ecode;
+       union ntdb_attribute locking;
+       enum NTDB_ERROR ecode;
 
        if (timeout) {
-               locking.base.attr = TDB_ATTRIBUTE_FLOCK;
-               ecode = tdb_get_attribute(tdb, &locking);
-               if (ecode != TDB_SUCCESS)
+               locking.base.attr = NTDB_ATTRIBUTE_FLOCK;
+               ecode = ntdb_get_attribute(ntdb, &locking);
+               if (ecode != NTDB_SUCCESS)
                        return ecode;
 
                /* Replace locking function with our own. */
                locking.flock.data = &timeout;
                locking.flock.lock = timeout_lock;
 
-               ecode = tdb_set_attribute(tdb, &locking);
-               if (ecode != TDB_SUCCESS)
+               ecode = ntdb_set_attribute(ntdb, &locking);
+               if (ecode != NTDB_SUCCESS)
                        return ecode;
        }
        if (rw_type == F_RDLCK)
-               ecode = tdb_chainlock_read(tdb, key);
+               ecode = ntdb_chainlock_read(ntdb, key);
        else
-               ecode = tdb_chainlock(tdb, key);
+               ecode = ntdb_chainlock(ntdb, key);
 
        if (timeout) {
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
+               ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
        }
        return ecode;
 }
@@ -130,10 +130,10 @@ static int tdb_chainlock_with_timeout_internal(struct tdb_context *tdb,
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       TDB_DATA key = tdb_mkdata("hello", 5);
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
+       struct ntdb_context *ntdb;
+       NTDB_DATA key = ntdb_mkdata("hello", 5);
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
        struct agent *agent;
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 15);
@@ -141,52 +141,52 @@ int main(int argc, char *argv[])
        agent = prepare_external_agent();
 
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               enum TDB_ERROR ecode;
-               tdb = tdb_open("run-locktimeout.tdb", flags[i],
+               enum NTDB_ERROR ecode;
+               ntdb = ntdb_open("run-locktimeout.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
+               if (!ok1(ntdb))
                        break;
 
                /* Simple cases: should succeed. */
-               ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
+               ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
                                                            F_RDLCK);
-               ok1(ecode == TDB_SUCCESS);
+               ok1(ecode == NTDB_SUCCESS);
                ok1(tap_log_messages == 0);
 
-               tdb_chainunlock_read(tdb, key);
+               ntdb_chainunlock_read(ntdb, key);
                ok1(tap_log_messages == 0);
 
-               ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
+               ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
                                                            F_WRLCK);
-               ok1(ecode == TDB_SUCCESS);
+               ok1(ecode == NTDB_SUCCESS);
                ok1(tap_log_messages == 0);
 
-               tdb_chainunlock(tdb, key);
+               ntdb_chainunlock(ntdb, key);
                ok1(tap_log_messages == 0);
 
                /* OK, get agent to start transaction, then we should time out. */
-               ok1(external_agent_operation(agent, OPEN, "run-locktimeout.tdb")
+               ok1(external_agent_operation(agent, OPEN, "run-locktimeout.ntdb")
                    == SUCCESS);
                ok1(external_agent_operation(agent, TRANSACTION_START, "")
                    == SUCCESS);
-               ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
+               ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
                                                            F_WRLCK);
-               ok1(ecode == TDB_ERR_LOCK);
+               ok1(ecode == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
 
                /* Even if we get a different signal, should be fine. */
                CatchSignal(SIGUSR1, do_nothing);
                external_agent_operation(agent, SEND_SIGNAL, "");
-               ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
+               ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
                                                            F_WRLCK);
-               ok1(ecode == TDB_ERR_LOCK);
+               ok1(ecode == NTDB_ERR_LOCK);
                ok1(tap_log_messages == 0);
 
                ok1(external_agent_operation(agent, TRANSACTION_COMMIT, "")
                    == SUCCESS);
                ok1(external_agent_operation(agent, CLOSE, "")
                    == SUCCESS);
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
        free_external_agent(agent);
        return exit_status();
similarity index 64%
rename from lib/tdb2/test/api-missing-entries.c
rename to lib/ntdb/test/api-missing-entries.c
index c81839bc054df055bd61017821ecb7f59d2c6c74..1c8064f945ae00e1cb3d81a2238563d4a0e51b42 100644 (file)
@@ -1,6 +1,6 @@
 /* Another test revealed that we lost an entry.  This reproduces it. */
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include <ccan/hash/hash.h>
 #include "tap-interface.h"
 #include <sys/types.h>
@@ -20,23 +20,23 @@ static uint64_t failhash(const void *key, size_t len, uint64_t seed, void *p)
 int main(int argc, char *argv[])
 {
        int i;
-       struct tdb_context *tdb;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-       union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
+       struct ntdb_context *ntdb;
+       NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+       union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
                                                .fn = failhash } };
 
        hattr.base.next = &tap_log_attr;
        plan_tests(1 + NUM_RECORDS + 2);
 
-       tdb = tdb_open("run-missing-entries.tdb", TDB_INTERNAL,
+       ntdb = ntdb_open("run-missing-entries.ntdb", NTDB_INTERNAL,
                       O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
-       if (ok1(tdb)) {
+       if (ok1(ntdb)) {
                for (i = 0; i < NUM_RECORDS; i++) {
-                       ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
+                       ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
                }
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               tdb_close(tdb);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ntdb_close(ntdb);
        }
 
        ok1(tap_log_messages == 0);
diff --git a/lib/ntdb/test/api-open-multiple-times.c b/lib/ntdb/test/api-open-multiple-times.c
new file mode 100644 (file)
index 0000000..70bad00
--- /dev/null
@@ -0,0 +1,83 @@
+#include "config.h"
+#include "ntdb.h"
+#include "tap-interface.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include "logging.h"
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb, *ntdb2;
+       NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 28);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               ntdb = ntdb_open("run-open-multiple-times.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               ntdb2 = ntdb_open("run-open-multiple-times.ntdb", flags[i],
+                               O_RDWR|O_CREAT, 0600, &tap_log_attr);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ok1(ntdb_check(ntdb2, NULL, NULL) == 0);
+
+               /* Store in one, fetch in the other. */
+               ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+               ok1(ntdb_fetch(ntdb2, key, &d) == NTDB_SUCCESS);
+               ok1(ntdb_deq(d, data));
+               free(d.dptr);
+
+               /* Vice versa, with delete. */
+               ok1(ntdb_delete(ntdb2, key) == 0);
+               ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_NOEXIST);
+
+               /* OK, now close first one, check second still good. */
+               ok1(ntdb_close(ntdb) == 0);
+
+               ok1(ntdb_store(ntdb2, key, data, NTDB_REPLACE) == 0);
+               ok1(ntdb_fetch(ntdb2, key, &d) == NTDB_SUCCESS);
+               ok1(ntdb_deq(d, data));
+               free(d.dptr);
+
+               /* Reopen */
+               ntdb = ntdb_open("run-open-multiple-times.ntdb", flags[i],
+                              O_RDWR|O_CREAT, 0600, &tap_log_attr);
+               ok1(ntdb);
+
+               ok1(ntdb_transaction_start(ntdb2) == 0);
+
+               /* Anything in the other one should fail. */
+               ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
+               ok1(tap_log_messages == 1);
+               ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
+               ok1(tap_log_messages == 2);
+               ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
+               ok1(tap_log_messages == 3);
+               ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
+               ok1(tap_log_messages == 4);
+
+               /* Transaciton should work as normal. */
+               ok1(ntdb_store(ntdb2, key, data, NTDB_REPLACE) == NTDB_SUCCESS);
+
+               /* Now... try closing with locks held. */
+               ok1(ntdb_close(ntdb2) == 0);
+
+               ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+               ok1(ntdb_deq(d, data));
+               free(d.dptr);
+               ok1(ntdb_close(ntdb) == 0);
+               ok1(tap_log_messages == 4);
+               tap_log_messages = 0;
+       }
+
+       return exit_status();
+}
similarity index 59%
rename from lib/tdb2/test/api-record-expand.c
rename to lib/ntdb/test/api-record-expand.c
index 34799ebe5e59bfe670032e0ca6140aace683287d..cea5a10bfbba83131ac3398e3edba5c960e4ec48 100644 (file)
@@ -1,5 +1,5 @@
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data;
 
        data.dptr = malloc(MAX_SIZE);
        memset(data.dptr, 0x24, MAX_SIZE);
@@ -26,23 +26,23 @@ int main(int argc, char *argv[])
        plan_tests(sizeof(flags) / sizeof(flags[0])
                   * (3 + (1 + (MAX_SIZE/SIZE_STEP)) * 2) + 1);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-record-expand.tdb", flags[i],
+               ntdb = ntdb_open("run-record-expand.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
                data.dsize = 0;
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
                for (data.dsize = 0;
                     data.dsize < MAX_SIZE;
                     data.dsize += SIZE_STEP) {
                        memset(data.dptr, data.dsize, data.dsize);
-                       ok1(tdb_store(tdb, key, data, TDB_MODIFY) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
+                       ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == 0);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
                }
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
        ok1(tap_log_messages == 0);
        free(data.dptr);
diff --git a/lib/ntdb/test/api-simple-delete.c b/lib/ntdb/test/api-simple-delete.c
new file mode 100644 (file)
index 0000000..2b20e19
--- /dev/null
@@ -0,0 +1,39 @@
+#include "config.h"
+#include "ntdb.h"
+#include "tap-interface.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "logging.h"
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data = ntdb_mkdata("data", 4);
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               ntdb = ntdb_open("run-simple-delete.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               ok1(ntdb);
+               if (ntdb) {
+                       /* Delete should fail. */
+                       ok1(ntdb_delete(ntdb, key) == NTDB_ERR_NOEXIST);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+                       /* Insert should succeed. */
+                       ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+                       /* Delete should now work. */
+                       ok1(ntdb_delete(ntdb, key) == 0);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+                       ntdb_close(ntdb);
+               }
+       }
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
similarity index 61%
rename from lib/tdb2/test/api-summary.c
rename to lib/ntdb/test/api-summary.c
index e9dfd270e953ec568419a6c526739b649cbb4775..8060ef29be7b521d77a81baf0b4d91d9ae2bb60f 100644 (file)
@@ -1,5 +1,5 @@
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "tap-interface.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 int main(int argc, char *argv[])
 {
        unsigned int i, j;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
-       struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
+       NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
        char *summary;
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 5) + 1);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-summary.tdb", flags[i],
+               ntdb = ntdb_open("run-summary.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
                /* Put some stuff in there. */
                for (j = 0; j < 500; j++) {
                        /* Make sure padding varies to we get some graphs! */
                        data.dsize = j % (sizeof(j) + 1);
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                               fail("Storing in tdb");
+                       if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+                               fail("Storing in ntdb");
                }
 
                for (j = 0;
-                    j <= TDB_SUMMARY_HISTOGRAMS;
-                    j += TDB_SUMMARY_HISTOGRAMS) {
-                       ok1(tdb_summary(tdb, j, &summary) == TDB_SUCCESS);
+                    j <= NTDB_SUMMARY_HISTOGRAMS;
+                    j += NTDB_SUMMARY_HISTOGRAMS) {
+                       ok1(ntdb_summary(ntdb, j, &summary) == NTDB_SUCCESS);
                        ok1(strstr(summary, "Number of records: 500\n"));
                        ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n"));
                        ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n"));
-                       if (j == TDB_SUMMARY_HISTOGRAMS) {
+                       if (j == NTDB_SUMMARY_HISTOGRAMS) {
                                ok1(strstr(summary, "|")
                                    && strstr(summary, "*"));
                        } else {
@@ -50,7 +50,7 @@ int main(int argc, char *argv[])
                        }
                        free(summary);
                }
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
        ok1(tap_log_messages == 0);
similarity index 75%
rename from lib/tdb2/test/external-agent.c
rename to lib/ntdb/test/external-agent.c
index e8cff957280597d9e861d25afef70422a6550686..098d0cb5959f9fd109267be315f53d6969cf7098 100644 (file)
 #include <stdio.h>
 #include <stdarg.h>
 
-static struct tdb_context *tdb;
+static struct ntdb_context *ntdb;
 
 void (*external_agent_free)(void *) = free;
 
-static enum TDB_ERROR clear_if_first(int fd, void *arg)
+static enum NTDB_ERROR clear_if_first(int fd, void *arg)
 {
 /* We hold a lock offset 4 always, so we can tell if anyone is holding it.
- * (This is compatible with tdb1's TDB_CLEAR_IF_FIRST flag).  */
+ * (This is compatible with tdb's TDB_CLEAR_IF_FIRST flag).  */
        struct flock fl;
 
        fl.l_type = F_WRLCK;
@@ -33,73 +33,73 @@ static enum TDB_ERROR clear_if_first(int fd, void *arg)
                /* We must be first ones to open it! */
                diag("agent truncating file!");
                if (ftruncate(fd, 0) != 0) {
-                       return TDB_ERR_IO;
+                       return NTDB_ERR_IO;
                }
        }
        fl.l_type = F_RDLCK;
        if (fcntl(fd, F_SETLKW, &fl) != 0) {
-               return TDB_ERR_IO;
+               return NTDB_ERR_IO;
        }
-       return TDB_SUCCESS;
+       return NTDB_SUCCESS;
 }
 
 static enum agent_return do_operation(enum operation op, const char *name)
 {
-       TDB_DATA k;
+       NTDB_DATA k;
        enum agent_return ret;
-       TDB_DATA data;
-       enum TDB_ERROR ecode;
-       union tdb_attribute cif;
+       NTDB_DATA data;
+       enum NTDB_ERROR ecode;
+       union ntdb_attribute cif;
 
-       if (op != OPEN && op != OPEN_WITH_HOOK && !tdb) {
-               diag("external: No tdb open!");
+       if (op != OPEN && op != OPEN_WITH_HOOK && !ntdb) {
+               diag("external: No ntdb open!");
                return OTHER_FAILURE;
        }
 
        diag("external: %s", operation_name(op));
 
-       k = tdb_mkdata(name, strlen(name));
+       k = ntdb_mkdata(name, strlen(name));
 
        locking_would_block = 0;
        switch (op) {
        case OPEN:
-               if (tdb) {
-                       diag("Already have tdb %s open", tdb_name(tdb));
+               if (ntdb) {
+                       diag("Already have ntdb %s open", ntdb_name(ntdb));
                        return OTHER_FAILURE;
                }
-               tdb = tdb_open(name, TDB_DEFAULT, O_RDWR, 0, &tap_log_attr);
-               if (!tdb) {
+               ntdb = ntdb_open(name, NTDB_DEFAULT, O_RDWR, 0, &tap_log_attr);
+               if (!ntdb) {
                        if (!locking_would_block)
-                               diag("Opening tdb gave %s", strerror(errno));
+                               diag("Opening ntdb gave %s", strerror(errno));
                        forget_locking();
                        ret = OTHER_FAILURE;
                } else
                        ret = SUCCESS;
                break;
        case OPEN_WITH_HOOK:
-               if (tdb) {
-                       diag("Already have tdb %s open", tdb_name(tdb));
+               if (ntdb) {
+                       diag("Already have ntdb %s open", ntdb_name(ntdb));
                        return OTHER_FAILURE;
                }
-               cif.openhook.base.attr = TDB_ATTRIBUTE_OPENHOOK;
+               cif.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK;
                cif.openhook.base.next = &tap_log_attr;
                cif.openhook.fn = clear_if_first;
-               tdb = tdb_open(name, TDB_DEFAULT, O_RDWR, 0, &cif);
-               if (!tdb) {
+               ntdb = ntdb_open(name, NTDB_DEFAULT, O_RDWR, 0, &cif);
+               if (!ntdb) {
                        if (!locking_would_block)
-                               diag("Opening tdb gave %s", strerror(errno));
+                               diag("Opening ntdb gave %s", strerror(errno));
                        forget_locking();
                        ret = OTHER_FAILURE;
                } else
                        ret = SUCCESS;
                break;
        case FETCH:
-               ecode = tdb_fetch(tdb, k, &data);
-               if (ecode == TDB_ERR_NOEXIST) {
+               ecode = ntdb_fetch(ntdb, k, &data);
+               if (ecode == NTDB_ERR_NOEXIST) {
                        ret = FAILED;
                } else if (ecode < 0) {
                        ret = OTHER_FAILURE;
-               } else if (!tdb_deq(data, k)) {
+               } else if (!ntdb_deq(data, k)) {
                        ret = OTHER_FAILURE;
                        external_agent_free(data.dptr);
                } else {
@@ -108,23 +108,23 @@ static enum agent_return do_operation(enum operation op, const char *name)
                }
                break;
        case STORE:
-               ret = tdb_store(tdb, k, k, 0) == 0 ? SUCCESS : OTHER_FAILURE;
+               ret = ntdb_store(ntdb, k, k, 0) == 0 ? SUCCESS : OTHER_FAILURE;
                break;
        case TRANSACTION_START:
-               ret = tdb_transaction_start(tdb) == 0 ? SUCCESS : OTHER_FAILURE;
+               ret = ntdb_transaction_start(ntdb) == 0 ? SUCCESS : OTHER_FAILURE;
                break;
        case TRANSACTION_COMMIT:
-               ret = tdb_transaction_commit(tdb)==0 ? SUCCESS : OTHER_FAILURE;
+               ret = ntdb_transaction_commit(ntdb)==0 ? SUCCESS : OTHER_FAILURE;
                break;
        case NEEDS_RECOVERY:
-               ret = external_agent_needs_rec(tdb);
+               ret = external_agent_needs_rec(ntdb);
                break;
        case CHECK:
-               ret = tdb_check(tdb, NULL, NULL) == 0 ? SUCCESS : OTHER_FAILURE;
+               ret = ntdb_check(ntdb, NULL, NULL) == 0 ? SUCCESS : OTHER_FAILURE;
                break;
        case CLOSE:
-               ret = tdb_close(tdb) == 0 ? SUCCESS : OTHER_FAILURE;
-               tdb = NULL;
+               ret = ntdb_close(ntdb) == 0 ? SUCCESS : OTHER_FAILURE;
+               ntdb = NULL;
                break;
        case SEND_SIGNAL:
                /* We do this async */
@@ -144,7 +144,7 @@ struct agent {
        int cmdfd, responsefd;
 };
 
-/* Do this before doing any tdb stuff.  Return handle, or NULL. */
+/* Do this before doing any ntdb stuff.  Return handle, or NULL. */
 struct agent *prepare_external_agent(void)
 {
        int pid, ret;
similarity index 70%
rename from lib/tdb2/test/external-agent.h
rename to lib/ntdb/test/external-agent.h
index c4cd2b148d0f5a4fd293a8ba6b3d1d54f90c907f..c6b83d5b497e221f71d12bdb37497766916f2dfc 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef TDB2_TEST_EXTERNAL_AGENT_H
-#define TDB2_TEST_EXTERNAL_AGENT_H
+#ifndef NTDB_TEST_EXTERNAL_AGENT_H
+#define NTDB_TEST_EXTERNAL_AGENT_H
 
 /* For locking tests, we need a different process to try things at
  * various times. */
@@ -16,7 +16,7 @@ enum operation {
        CLOSE,
 };
 
-/* Do this before doing any tdb stuff.  Return handle, or -1. */
+/* Do this before doing any ntdb stuff.  Return handle, or -1. */
 struct agent *prepare_external_agent(void);
 
 enum agent_return {
@@ -28,14 +28,14 @@ enum agent_return {
 };
 
 /* Ask the external agent to try to do an operation.
- * name == tdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST,
+ * name == ntdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST,
  * record name for FETCH/STORE (store stores name as data too)
  */
 enum agent_return external_agent_operation(struct agent *handle,
                                           enum operation op,
                                           const char *name);
 
-/* Hook into free() on tdb_data in external agent. */
+/* Hook into free() on ntdb_data in external agent. */
 extern void (*external_agent_free)(void *);
 
 /* Mapping enum -> string. */
@@ -45,7 +45,7 @@ const char *operation_name(enum operation op);
 void free_external_agent(struct agent *agent);
 
 /* Internal use: */
-struct tdb_context;
-enum agent_return external_agent_needs_rec(struct tdb_context *tdb);
+struct ntdb_context;
+enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb);
 
-#endif /* TDB2_TEST_EXTERNAL_AGENT_H */
+#endif /* NTDB_TEST_EXTERNAL_AGENT_H */
similarity index 94%
rename from lib/tdb2/test/failtest_helper.c
rename to lib/ntdb/test/failtest_helper.c
index 386f1c2379279cdd65ebe200fcd01cee73fcfb0d..cc110919c3c21f2a66e8042df4649237d33b8607 100644 (file)
@@ -51,8 +51,8 @@ bool exit_check_log(struct tlist_calls *history)
                if (failmatch(i, URANDOM_READ))
                        continue;
 
-               /* Initial allocation of tdb doesn't log. */
-               if (failmatch(i, INITIAL_TDB_MALLOC))
+               /* Initial allocation of ntdb doesn't log. */
+               if (failmatch(i, INITIAL_NTDB_MALLOC))
                        continue;
 
                /* We don't block "failures" on non-blocking locks. */
@@ -77,7 +77,7 @@ block_repeat_failures(struct tlist_calls *history)
        if (failtest_suppress)
                return FAIL_DONT_FAIL;
 
-       if (failmatch(last, INITIAL_TDB_MALLOC)
+       if (failmatch(last, INITIAL_NTDB_MALLOC)
            || failmatch(last, URANDOM_OPEN)
            || failmatch(last, URANDOM_READ)) {
                return FAIL_PROBE;
similarity index 75%
rename from lib/tdb2/test/failtest_helper.h
rename to lib/ntdb/test/failtest_helper.h
index 3c509e7c38b9c4cce8cef806d7476ce8ec071b55..e754636402eee766f76fdbfcef359b68c95dbd12 100644 (file)
@@ -1,10 +1,10 @@
-#ifndef TDB2_TEST_FAILTEST_HELPER_H
-#define TDB2_TEST_FAILTEST_HELPER_H
+#ifndef NTDB_TEST_FAILTEST_HELPER_H
+#define NTDB_TEST_FAILTEST_HELPER_H
 #include <ccan/failtest/failtest.h>
 #include <stdbool.h>
 
 /* FIXME: Check these! */
-#define INITIAL_TDB_MALLOC     "open.c", 403, FAILTEST_MALLOC
+#define INITIAL_NTDB_MALLOC    "open.c", 403, FAILTEST_MALLOC
 #define URANDOM_OPEN           "open.c", 62, FAILTEST_OPEN
 #define URANDOM_READ           "open.c", 42, FAILTEST_READ
 
@@ -16,4 +16,4 @@ enum failtest_result block_repeat_failures(struct tlist_calls *history);
 /* Set this to suppress failure. */
 extern bool failtest_suppress;
 
-#endif /* TDB2_TEST_LOGGING_H */
+#endif /* NTDB_TEST_LOGGING_H */
diff --git a/lib/ntdb/test/helpapi-external-agent.c b/lib/ntdb/test/helpapi-external-agent.c
new file mode 100644 (file)
index 0000000..eb81399
--- /dev/null
@@ -0,0 +1,7 @@
+#include "external-agent.h"
+
+/* This isn't possible with via the ntdb API, but this makes it link. */
+enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb)
+{
+       return FAILED;
+}
diff --git a/lib/ntdb/test/helprun-external-agent.c b/lib/ntdb/test/helprun-external-agent.c
new file mode 100644 (file)
index 0000000..81a3fe8
--- /dev/null
@@ -0,0 +1,7 @@
+#include "external-agent.h"
+#include "private.h"
+
+enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb)
+{
+       return ntdb_needs_recovery(ntdb) ? SUCCESS : FAILED;
+}
diff --git a/lib/ntdb/test/helprun-layout.c b/lib/ntdb/test/helprun-layout.c
new file mode 100644 (file)
index 0000000..c8f1fd0
--- /dev/null
@@ -0,0 +1,402 @@
+/* NTDB tools to create various canned database layouts. */
+#include "layout.h"
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <ccan/err/err.h>
+#include "logging.h"
+
+struct ntdb_layout *new_ntdb_layout(void)
+{
+       struct ntdb_layout *layout = malloc(sizeof(*layout));
+       layout->num_elems = 0;
+       layout->elem = NULL;
+       return layout;
+}
+
+static void add(struct ntdb_layout *layout, union ntdb_layout_elem elem)
+{
+       layout->elem = realloc(layout->elem,
+                              sizeof(layout->elem[0])
+                              * (layout->num_elems+1));
+       layout->elem[layout->num_elems++] = elem;
+}
+
+void ntdb_layout_add_freetable(struct ntdb_layout *layout)
+{
+       union ntdb_layout_elem elem;
+       elem.base.type = FREETABLE;
+       add(layout, elem);
+}
+
+void ntdb_layout_add_free(struct ntdb_layout *layout, ntdb_len_t len,
+                        unsigned ftable)
+{
+       union ntdb_layout_elem elem;
+       elem.base.type = FREE;
+       elem.free.len = len;
+       elem.free.ftable_num = ftable;
+       add(layout, elem);
+}
+
+void ntdb_layout_add_capability(struct ntdb_layout *layout,
+                              uint64_t type,
+                              bool write_breaks,
+                              bool check_breaks,
+                              bool open_breaks,
+                              ntdb_len_t extra)
+{
+       union ntdb_layout_elem elem;
+       elem.base.type = CAPABILITY;
+       elem.capability.type = type;
+       if (write_breaks)
+               elem.capability.type |= NTDB_CAP_NOWRITE;
+       if (open_breaks)
+               elem.capability.type |= NTDB_CAP_NOOPEN;
+       if (check_breaks)
+               elem.capability.type |= NTDB_CAP_NOCHECK;
+       elem.capability.extra = extra;
+       add(layout, elem);
+}
+
+static NTDB_DATA dup_key(NTDB_DATA key)
+{
+       NTDB_DATA ret;
+       ret.dsize = key.dsize;
+       ret.dptr = malloc(ret.dsize);
+       memcpy(ret.dptr, key.dptr, ret.dsize);
+       return ret;
+}
+
+void ntdb_layout_add_used(struct ntdb_layout *layout,
+                        NTDB_DATA key, NTDB_DATA data,
+                        ntdb_len_t extra)
+{
+       union ntdb_layout_elem elem;
+       elem.base.type = DATA;
+       elem.used.key = dup_key(key);
+       elem.used.data = dup_key(data);
+       elem.used.extra = extra;
+       add(layout, elem);
+}
+
+static ntdb_len_t free_record_len(ntdb_len_t len)
+{
+       return sizeof(struct ntdb_used_record) + len;
+}
+
+static ntdb_len_t data_record_len(struct tle_used *used)
+{
+       ntdb_len_t len;
+       len = sizeof(struct ntdb_used_record)
+               + used->key.dsize + used->data.dsize + used->extra;
+       assert(len >= sizeof(struct ntdb_free_record));
+       return len;
+}
+
+static ntdb_len_t hashtable_len(struct tle_hashtable *htable)
+{
+       return sizeof(struct ntdb_used_record)
+               + (sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS)
+               + htable->extra;
+}
+
+static ntdb_len_t capability_len(struct tle_capability *cap)
+{
+       return sizeof(struct ntdb_capability) + cap->extra;
+}
+
+static ntdb_len_t freetable_len(struct tle_freetable *ftable)
+{
+       return sizeof(struct ntdb_freetable);
+}
+
+static void set_free_record(void *mem, ntdb_len_t len)
+{
+       /* We do all the work in add_to_freetable */
+}
+
+static void add_zero_pad(struct ntdb_used_record *u, size_t len, size_t extra)
+{
+       if (extra)
+               ((char *)(u + 1))[len] = '\0';
+}
+
+static void set_data_record(void *mem, struct ntdb_context *ntdb,
+                           struct tle_used *used)
+{
+       struct ntdb_used_record *u = mem;
+
+       set_header(ntdb, u, NTDB_USED_MAGIC, used->key.dsize, used->data.dsize,
+                  used->key.dsize + used->data.dsize + used->extra,
+                  ntdb_hash(ntdb, used->key.dptr, used->key.dsize));
+       memcpy(u + 1, used->key.dptr, used->key.dsize);
+       memcpy((char *)(u + 1) + used->key.dsize,
+              used->data.dptr, used->data.dsize);
+       add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
+}
+
+static void set_hashtable(void *mem, struct ntdb_context *ntdb,
+                         struct tle_hashtable *htable)
+{
+       struct ntdb_used_record *u = mem;
+       ntdb_len_t len = sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS;
+
+       set_header(ntdb, u, NTDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0);
+       memset(u + 1, 0, len);
+       add_zero_pad(u, len, htable->extra);
+}
+
+static void set_capability(void *mem, struct ntdb_context *ntdb,
+                          struct tle_capability *cap, struct ntdb_header *hdr,
+                          ntdb_off_t last_cap)
+{
+       struct ntdb_capability *c = mem;
+       ntdb_len_t len = sizeof(*c) - sizeof(struct ntdb_used_record) + cap->extra;
+
+       c->type = cap->type;
+       c->next = 0;
+       set_header(ntdb, &c->hdr, NTDB_CAP_MAGIC, 0, len, len, 0);
+
+       /* Append to capability list. */
+       if (!last_cap) {
+               hdr->capabilities = cap->base.off;
+       } else {
+               c = (struct ntdb_capability *)((char *)hdr + last_cap);
+               c->next = cap->base.off;
+       }
+}
+
+static void set_freetable(void *mem, struct ntdb_context *ntdb,
+                        struct tle_freetable *freetable, struct ntdb_header *hdr,
+                        ntdb_off_t last_ftable)
+{
+       struct ntdb_freetable *ftable = mem;
+       memset(ftable, 0, sizeof(*ftable));
+       set_header(ntdb, &ftable->hdr, NTDB_FTABLE_MAGIC, 0,
+                       sizeof(*ftable) - sizeof(ftable->hdr),
+                       sizeof(*ftable) - sizeof(ftable->hdr), 0);
+
+       if (last_ftable) {
+               ftable = (struct ntdb_freetable *)((char *)hdr + last_ftable);
+               ftable->next = freetable->base.off;
+       } else {
+               hdr->free_table = freetable->base.off;
+       }
+}
+
+static void add_to_freetable(struct ntdb_context *ntdb,
+                            ntdb_off_t eoff,
+                            ntdb_off_t elen,
+                            unsigned ftable,
+                            struct tle_freetable *freetable)
+{
+       ntdb->ftable_off = freetable->base.off;
+       ntdb->ftable = ftable;
+       add_free_record(ntdb, eoff, sizeof(struct ntdb_used_record) + elen,
+                       NTDB_LOCK_WAIT, false);
+}
+
+static ntdb_off_t hbucket_off(ntdb_off_t group_start, unsigned ingroup)
+{
+       return group_start
+               + (ingroup % (1 << NTDB_HASH_GROUP_BITS)) * sizeof(ntdb_off_t);
+}
+
+/* Get bits from a value. */
+static uint32_t bits(uint64_t val, unsigned start, unsigned num)
+{
+       assert(num <= 32);
+       return (val >> start) & ((1U << num) - 1);
+}
+
+/* We take bits from the top: that way we can lock whole sections of the hash
+ * by using lock ranges. */
+static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
+{
+       *used += num;
+       return bits(h, 64 - *used, num);
+}
+
+static ntdb_off_t encode_offset(ntdb_off_t new_off, unsigned bucket,
+                              uint64_t h)
+{
+       return bucket
+               | new_off
+               | ((uint64_t)bits(h, 64 - NTDB_OFF_UPPER_STEAL_EXTRA,
+                                 NTDB_OFF_UPPER_STEAL_EXTRA)
+                  << NTDB_OFF_HASH_EXTRA_BIT);
+}
+
+/* FIXME: Our hash table handling here is primitive: we don't expand! */
+static void add_to_hashtable(struct ntdb_context *ntdb,
+                            ntdb_off_t eoff,
+                            NTDB_DATA key)
+{
+       uint64_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
+       ntdb_off_t b_off, group_start;
+       unsigned i, group, in_group;
+       unsigned used = 0;
+
+       group = use_bits(h, NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS, &used);
+       in_group = use_bits(h, NTDB_HASH_GROUP_BITS, &used);
+
+       group_start = offsetof(struct ntdb_header, hashtable)
+               + group * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS);
+
+       for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
+               unsigned bucket = (in_group + i) % (1 << NTDB_HASH_GROUP_BITS);
+
+               b_off = hbucket_off(group_start, bucket);
+               if (ntdb_read_off(ntdb, b_off) == 0) {
+                       ntdb_write_off(ntdb, b_off,
+                                     encode_offset(eoff, in_group, h));
+                       return;
+               }
+       }
+       abort();
+}
+
+static struct tle_freetable *find_ftable(struct ntdb_layout *layout, unsigned num)
+{
+       unsigned i;
+
+       for (i = 0; i < layout->num_elems; i++) {
+               if (layout->elem[i].base.type != FREETABLE)
+                       continue;
+               if (num == 0)
+                       return &layout->elem[i].ftable;
+               num--;
+       }
+       abort();
+}
+
+/* FIXME: Support NTDB_CONVERT */
+struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout,
+                                  void (*freefn)(void *),
+                                  union ntdb_attribute *attr)
+{
+       unsigned int i;
+       ntdb_off_t off, len, last_ftable, last_cap;
+       char *mem;
+       struct ntdb_context *ntdb;
+
+       off = sizeof(struct ntdb_header);
+
+       /* First pass of layout: calc lengths */
+       for (i = 0; i < layout->num_elems; i++) {
+               union ntdb_layout_elem *e = &layout->elem[i];
+               e->base.off = off;
+               switch (e->base.type) {
+               case FREETABLE:
+                       len = freetable_len(&e->ftable);
+                       break;
+               case FREE:
+                       len = free_record_len(e->free.len);
+                       break;
+               case DATA:
+                       len = data_record_len(&e->used);
+                       break;
+               case HASHTABLE:
+                       len = hashtable_len(&e->hashtable);
+                       break;
+               case CAPABILITY:
+                       len = capability_len(&e->capability);
+                       break;
+               default:
+                       abort();
+               }
+               off += len;
+       }
+
+       mem = malloc(off);
+       /* Fill with some weird pattern. */
+       memset(mem, 0x99, off);
+       /* Now populate our header, cribbing from a real NTDB header. */
+       ntdb = ntdb_open(NULL, NTDB_INTERNAL, O_RDWR, 0, attr);
+       memcpy(mem, ntdb->file->map_ptr, sizeof(struct ntdb_header));
+
+       /* Mug the ntdb we have to make it use this. */
+       freefn(ntdb->file->map_ptr);
+       ntdb->file->map_ptr = mem;
+       ntdb->file->map_size = off;
+
+       last_ftable = 0;
+       last_cap = 0;
+       for (i = 0; i < layout->num_elems; i++) {
+               union ntdb_layout_elem *e = &layout->elem[i];
+               switch (e->base.type) {
+               case FREETABLE:
+                       set_freetable(mem + e->base.off, ntdb, &e->ftable,
+                                    (struct ntdb_header *)mem, last_ftable);
+                       last_ftable = e->base.off;
+                       break;
+               case FREE:
+                       set_free_record(mem + e->base.off, e->free.len);
+                       break;
+               case DATA:
+                       set_data_record(mem + e->base.off, ntdb, &e->used);
+                       break;
+               case HASHTABLE:
+                       set_hashtable(mem + e->base.off, ntdb, &e->hashtable);
+                       break;
+               case CAPABILITY:
+                       set_capability(mem + e->base.off, ntdb, &e->capability,
+                                      (struct ntdb_header *)mem, last_cap);
+                       last_cap = e->base.off;
+                       break;
+               }
+       }
+       /* Must have a free table! */
+       assert(last_ftable);
+
+       /* Now fill the free and hash tables. */
+       for (i = 0; i < layout->num_elems; i++) {
+               union ntdb_layout_elem *e = &layout->elem[i];
+               switch (e->base.type) {
+               case FREE:
+                       add_to_freetable(ntdb, e->base.off, e->free.len,
+                                        e->free.ftable_num,
+                                        find_ftable(layout, e->free.ftable_num));
+                       break;
+               case DATA:
+                       add_to_hashtable(ntdb, e->base.off, e->used.key);
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       ntdb->ftable_off = find_ftable(layout, 0)->base.off;
+       return ntdb;
+}
+
+void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *),
+                      union ntdb_attribute *attr, const char *filename)
+{
+       struct ntdb_context *ntdb = ntdb_layout_get(layout, freefn, attr);
+       int fd;
+
+       fd = open(filename, O_WRONLY|O_TRUNC|O_CREAT,  0600);
+       if (fd < 0)
+               err(1, "opening %s for writing", filename);
+       if (write(fd, ntdb->file->map_ptr, ntdb->file->map_size)
+           != ntdb->file->map_size)
+               err(1, "writing %s", filename);
+       close(fd);
+       ntdb_close(ntdb);
+}
+
+void ntdb_layout_free(struct ntdb_layout *layout)
+{
+       unsigned int i;
+
+       for (i = 0; i < layout->num_elems; i++) {
+               if (layout->elem[i].base.type == DATA) {
+                       free(layout->elem[i].used.key.dptr);
+                       free(layout->elem[i].used.data.dptr);
+               }
+       }
+       free(layout->elem);
+       free(layout);
+}
similarity index 50%
rename from lib/tdb2/test/layout.h
rename to lib/ntdb/test/layout.h
index 3aadf20ee21132842e27778b57f728382b734fc4..bcd20b896513b710510a3746cedd4399637e61d4 100644 (file)
@@ -1,35 +1,35 @@
-#ifndef TDB2_TEST_LAYOUT_H
-#define TDB2_TEST_LAYOUT_H
+#ifndef NTDB_TEST_LAYOUT_H
+#define NTDB_TEST_LAYOUT_H
 #include "private.h"
 
-struct tdb_layout *new_tdb_layout(void);
-void tdb_layout_add_freetable(struct tdb_layout *layout);
-void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
+struct ntdb_layout *new_ntdb_layout(void);
+void ntdb_layout_add_freetable(struct ntdb_layout *layout);
+void ntdb_layout_add_free(struct ntdb_layout *layout, ntdb_len_t len,
                         unsigned ftable);
-void tdb_layout_add_used(struct tdb_layout *layout,
-                        TDB_DATA key, TDB_DATA data,
-                        tdb_len_t extra);
-void tdb_layout_add_capability(struct tdb_layout *layout,
+void ntdb_layout_add_used(struct ntdb_layout *layout,
+                        NTDB_DATA key, NTDB_DATA data,
+                        ntdb_len_t extra);
+void ntdb_layout_add_capability(struct ntdb_layout *layout,
                               uint64_t type,
                               bool write_breaks,
                               bool check_breaks,
                               bool open_breaks,
-                              tdb_len_t extra);
+                              ntdb_len_t extra);
 
 #if 0 /* FIXME: Allow allocation of subtables */
-void tdb_layout_add_hashtable(struct tdb_layout *layout,
+void ntdb_layout_add_hashtable(struct ntdb_layout *layout,
                              int htable_parent, /* -1 == toplevel */
                              unsigned int bucket,
-                             tdb_len_t extra);
+                             ntdb_len_t extra);
 #endif
 /* freefn is needed if we're using failtest_free. */
-struct tdb_context *tdb_layout_get(struct tdb_layout *layout,
+struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout,
                                   void (*freefn)(void *),
-                                  union tdb_attribute *attr);
-void tdb_layout_write(struct tdb_layout *layout, void (*freefn)(void *),
-                      union tdb_attribute *attr, const char *filename);
+                                  union ntdb_attribute *attr);
+void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *),
+                      union ntdb_attribute *attr, const char *filename);
 
-void tdb_layout_free(struct tdb_layout *layout);
+void ntdb_layout_free(struct ntdb_layout *layout);
 
 enum layout_type {
        FREETABLE, FREE, DATA, HASHTABLE, CAPABILITY
@@ -38,7 +38,7 @@ enum layout_type {
 /* Shared by all union members. */
 struct tle_base {
        enum layout_type type;
-       tdb_off_t off;
+       ntdb_off_t off;
 };
 
 struct tle_freetable {
@@ -47,31 +47,31 @@ struct tle_freetable {
 
 struct tle_free {
        struct tle_base base;
-       tdb_len_t len;
+       ntdb_len_t len;
        unsigned ftable_num;
 };
 
 struct tle_used {
        struct tle_base base;
-       TDB_DATA key;
-       TDB_DATA data;
-       tdb_len_t extra;
+       NTDB_DATA key;
+       NTDB_DATA data;
+       ntdb_len_t extra;
 };
 
 struct tle_hashtable {
        struct tle_base base;
        int parent;
        unsigned int bucket;
-       tdb_len_t extra;
+       ntdb_len_t extra;
 };
 
 struct tle_capability {
        struct tle_base base;
        uint64_t type;
-       tdb_len_t extra;
+       ntdb_len_t extra;
 };
 
-union tdb_layout_elem {
+union ntdb_layout_elem {
        struct tle_base base;
        struct tle_freetable ftable;
        struct tle_free free;
@@ -80,8 +80,8 @@ union tdb_layout_elem {
        struct tle_capability capability;
 };
 
-struct tdb_layout {
+struct ntdb_layout {
        unsigned int num_elems;
-       union tdb_layout_elem *elem;
+       union ntdb_layout_elem *elem;
 };
-#endif /* TDB2_TEST_LAYOUT_H */
+#endif /* NTDB_TEST_LAYOUT_H */
similarity index 92%
rename from lib/tdb2/test/lock-tracking.c
rename to lib/ntdb/test/lock-tracking.c
index c7387ead994f5e10377f23f03b50304d48bdce96..525a5c4ca7f6467d3138a6417786604a9c84e1b4 100644 (file)
@@ -1,5 +1,5 @@
 /* We save the locks so we can reaquire them. */
-#include "private.h" /* For TDB_HASH_LOCK_START, etc. */
+#include "private.h" /* For NTDB_HASH_LOCK_START, etc. */
 #include <unistd.h>
 #include <fcntl.h>
 #include <stdarg.h>
@@ -88,8 +88,8 @@ int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ )
                        if (fl_end > i->off && fl_end < i_end)
                                break;
 
-                       /* tdb_allrecord_lock does this, handle adjacent: */
-                       if (fl->l_start > TDB_HASH_LOCK_START
+                       /* ntdb_allrecord_lock does this, handle adjacent: */
+                       if (fl->l_start > NTDB_HASH_LOCK_START
                            && fl->l_start == i_end && fl->l_type == i->type) {
                                if (ret == 0) {
                                        i->len = fl->l_len
@@ -102,8 +102,8 @@ int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ )
                if (i) {
                        /* Special case: upgrade of allrecord lock. */
                        if (i->type == F_RDLCK && fl->l_type == F_WRLCK
-                           && i->off == TDB_HASH_LOCK_START
-                           && fl->l_start == TDB_HASH_LOCK_START
+                           && i->off == NTDB_HASH_LOCK_START
+                           && fl->l_start == NTDB_HASH_LOCK_START
                            && i->len == 0
                            && fl->l_len == 0) {
                                if (ret == 0)
similarity index 57%
rename from lib/tdb2/test/logging.c
rename to lib/ntdb/test/logging.c
index 86fc152bab309d03abafed6bc86e0c1f142d3398..2819dd7cad0b04f14888ff0f6bbe9a05259cf01e 100644 (file)
@@ -8,21 +8,21 @@ const char *log_prefix = "";
 char *log_last = NULL;
 bool suppress_logging;
 
-union tdb_attribute tap_log_attr = {
-       .log = { .base = { .attr = TDB_ATTRIBUTE_LOG },
+union ntdb_attribute tap_log_attr = {
+       .log = { .base = { .attr = NTDB_ATTRIBUTE_LOG },
                 .fn = tap_log_fn }
 };
 
-void tap_log_fn(struct tdb_context *tdb,
-               enum tdb_log_level level,
-               enum TDB_ERROR ecode,
+void tap_log_fn(struct ntdb_context *ntdb,
+               enum ntdb_log_level level,
+               enum NTDB_ERROR ecode,
                const char *message, void *priv)
 {
        if (suppress_logging)
                return;
 
-       diag("tdb log level %u: %s: %s%s",
-            level, tdb_errorstr(ecode), log_prefix, message);
+       diag("ntdb log level %u: %s: %s%s",
+            level, ntdb_errorstr(ecode), log_prefix, message);
        if (log_last)
                free(log_last);
        log_last = strdup(message);
diff --git a/lib/ntdb/test/logging.h b/lib/ntdb/test/logging.h
new file mode 100644 (file)
index 0000000..0336cca
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef NTDB_TEST_LOGGING_H
+#define NTDB_TEST_LOGGING_H
+#include "ntdb.h"
+#include <stdbool.h>
+#include <string.h>
+
+extern bool suppress_logging;
+extern const char *log_prefix;
+extern unsigned tap_log_messages;
+extern union ntdb_attribute tap_log_attr;
+extern char *log_last;
+
+void tap_log_fn(struct ntdb_context *ntdb,
+               enum ntdb_log_level level,
+               enum NTDB_ERROR ecode,
+               const char *message, void *priv);
+#endif /* NTDB_TEST_LOGGING_H */
similarity index 91%
rename from lib/tdb2/test/tdb2-source.h
rename to lib/ntdb/test/ntdb-source.h
index d13d8b868cc8292fdd2e49d5ada8022aa92d00c6..52268440d24c5d1e649a75b288f54e0b3f9304f6 100644 (file)
@@ -6,6 +6,6 @@
 #include "lock.c"
 #include "open.c"
 #include "summary.c"
-#include "tdb.c"
+#include "ntdb.c"
 #include "transaction.c"
 #include "traverse.c"
similarity index 68%
rename from lib/tdb2/test/run-001-encode.c
rename to lib/ntdb/test/run-001-encode.c
index 9657eb79d005942bc2abc8e6c1553c7ffbdbb65e..12965676a26396ec3d106f9298a1b97b6923881a 100644 (file)
@@ -1,24 +1,24 @@
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include "logging.h"
 
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_used_record rec;
-       struct tdb_context tdb = { .log_fn = tap_log_fn };
+       struct ntdb_used_record rec;
+       struct ntdb_context ntdb = { .log_fn = tap_log_fn };
 
        plan_tests(64 + 32 + 48*6 + 1);
 
        /* We should be able to encode any data value. */
        for (i = 0; i < 64; i++)
-               ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, 0, 1ULL << i,
+               ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, 0, 1ULL << i,
                               1ULL << i, 0) == 0);
 
        /* And any key and data with < 64 bits between them. */
        for (i = 0; i < 32; i++) {
-               tdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i;
-               ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, klen, dlen,
+               ntdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i;
+               ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen,
                               klen + dlen, 0)  == 0);
        }
 
@@ -28,13 +28,13 @@ int main(int argc, char *argv[])
                uint64_t klen = 1ULL << (i < 16 ? i : 15);
                uint64_t dlen = 1ULL << i;
                uint64_t xlen = 1ULL << (i < 32 ? i : 31);
-               ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, klen, dlen,
+               ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen,
                               klen+dlen+xlen, h) == 0);
                ok1(rec_key_length(&rec) == klen);
                ok1(rec_data_length(&rec) == dlen);
                ok1(rec_extra_padding(&rec) == xlen);
                ok1((uint64_t)rec_hash(&rec) == h);
-               ok1(rec_magic(&rec) == TDB_USED_MAGIC);
+               ok1(rec_magic(&rec) == NTDB_USED_MAGIC);
        }
        ok1(tap_log_messages == 0);
        return exit_status();
similarity index 95%
rename from lib/tdb2/test/run-001-fls.c
rename to lib/ntdb/test/run-001-fls.c
index 792adbf655331313af45d2912abcde7ff58b4439..ec61294c6f32b21db970503fb3d0356a1afa1b91 100644 (file)
@@ -1,4 +1,4 @@
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 
 static unsigned int dumb_fls(uint64_t num)
similarity index 67%
rename from lib/tdb2/test/run-01-new_database.c
rename to lib/ntdb/test/run-01-new_database.c
index 00c15140df92ee30291320a601458e3b4e077df5..ae70e86e0728328d8144ed26d15f4f21c6f20986 100644 (file)
@@ -1,5 +1,5 @@
 #include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include <ccan/failtest/failtest.h>
 #include "logging.h"
@@ -8,25 +8,25 @@
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
 
        failtest_init(argc, argv);
        failtest_hook = block_repeat_failures;
        failtest_exit_check = exit_check_log;
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-new_database.tdb", flags[i],
+               ntdb = ntdb_open("run-new_database.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
+               if (!ok1(ntdb))
                        failtest_exit(exit_status());
 
                failtest_suppress = true;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
                failtest_suppress = false;
-               tdb_close(tdb);
+               ntdb_close(ntdb);
                if (!ok1(tap_log_messages == 0))
                        break;
        }
diff --git a/lib/ntdb/test/run-02-expand.c b/lib/ntdb/test/run-02-expand.c
new file mode 100644 (file)
index 0000000..abf1569
--- /dev/null
@@ -0,0 +1,62 @@
+#include <ccan/failtest/failtest_override.h>
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include <ccan/failtest/failtest.h>
+#include "logging.h"
+#include "failtest_helper.h"
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       uint64_t val;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1);
+
+       failtest_init(argc, argv);
+       failtest_hook = block_repeat_failures;
+       failtest_exit_check = exit_check_log;
+
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               failtest_suppress = true;
+               ntdb = ntdb_open("run-expand.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               if (!ok1(ntdb))
+                       break;
+
+               val = ntdb->file->map_size;
+               /* Need some hash lock for expand. */
+               ok1(ntdb_lock_hashes(ntdb, 0, 1, F_WRLCK, NTDB_LOCK_WAIT) == 0);
+               failtest_suppress = false;
+               if (!ok1(ntdb_expand(ntdb, 1) == 0)) {
+                       failtest_suppress = true;
+                       ntdb_close(ntdb);
+                       break;
+               }
+               failtest_suppress = true;
+
+               ok1(ntdb->file->map_size >= val + 1 * NTDB_EXTENSION_FACTOR);
+               ok1(ntdb_unlock_hashes(ntdb, 0, 1, F_WRLCK) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               val = ntdb->file->map_size;
+               ok1(ntdb_lock_hashes(ntdb, 0, 1, F_WRLCK, NTDB_LOCK_WAIT) == 0);
+               failtest_suppress = false;
+               if (!ok1(ntdb_expand(ntdb, 1024) == 0)) {
+                       failtest_suppress = true;
+                       ntdb_close(ntdb);
+                       break;
+               }
+               failtest_suppress = true;
+               ok1(ntdb_unlock_hashes(ntdb, 0, 1, F_WRLCK) == 0);
+               ok1(ntdb->file->map_size >= val + 1024 * NTDB_EXTENSION_FACTOR);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       failtest_exit(exit_status());
+}
diff --git a/lib/ntdb/test/run-03-coalesce.c b/lib/ntdb/test/run-03-coalesce.c
new file mode 100644 (file)
index 0000000..f93b33a
--- /dev/null
@@ -0,0 +1,178 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "layout.h"
+
+static ntdb_len_t free_record_length(struct ntdb_context *ntdb, ntdb_off_t off)
+{
+       struct ntdb_free_record f;
+       enum NTDB_ERROR ecode;
+
+       ecode = ntdb_read_convert(ntdb, off, &f, sizeof(f));
+       if (ecode != NTDB_SUCCESS)
+               return ecode;
+       if (frec_magic(&f) != NTDB_FREE_MAGIC)
+               return NTDB_ERR_CORRUPT;
+       return frec_len(&f);
+}
+
+int main(int argc, char *argv[])
+{
+       ntdb_off_t b_off, test;
+       struct ntdb_context *ntdb;
+       struct ntdb_layout *layout;
+       NTDB_DATA data, key;
+       ntdb_len_t len;
+
+       /* FIXME: Test NTDB_CONVERT */
+       /* FIXME: Test lock order fail. */
+
+       plan_tests(42);
+       data = ntdb_mkdata("world", 5);
+       key = ntdb_mkdata("hello", 5);
+
+       /* No coalescing can be done due to EOF */
+       layout = new_ntdb_layout();
+       ntdb_layout_add_freetable(layout);
+       len = 1024;
+       ntdb_layout_add_free(layout, len, 0);
+       ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
+       /* NOMMAP is for lockcheck. */
+       ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0,
+                      &tap_log_attr);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+       ok1(free_record_length(ntdb, layout->elem[1].base.off) == len);
+
+       /* Figure out which bucket free entry is. */
+       b_off = bucket_off(ntdb->ftable_off, size_to_bucket(len));
+       /* Lock and fail to coalesce. */
+       ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
+       test = layout->elem[1].base.off;
+       ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, len, &test)
+           == 0);
+       ntdb_unlock_free_bucket(ntdb, b_off);
+       ok1(free_record_length(ntdb, layout->elem[1].base.off) == len);
+       ok1(test == layout->elem[1].base.off);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+       ntdb_close(ntdb);
+       ntdb_layout_free(layout);
+
+       /* No coalescing can be done due to used record */
+       layout = new_ntdb_layout();
+       ntdb_layout_add_freetable(layout);
+       ntdb_layout_add_free(layout, 1024, 0);
+       ntdb_layout_add_used(layout, key, data, 6);
+       ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
+       /* NOMMAP is for lockcheck. */
+       ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0,
+                      &tap_log_attr);
+       ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Figure out which bucket free entry is. */
+       b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024));
+       /* Lock and fail to coalesce. */
+       ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
+       test = layout->elem[1].base.off;
+       ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
+           == 0);
+       ntdb_unlock_free_bucket(ntdb, b_off);
+       ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
+       ok1(test == layout->elem[1].base.off);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+       ntdb_close(ntdb);
+       ntdb_layout_free(layout);
+
+       /* Coalescing can be done due to two free records, then EOF */
+       layout = new_ntdb_layout();
+       ntdb_layout_add_freetable(layout);
+       ntdb_layout_add_free(layout, 1024, 0);
+       ntdb_layout_add_free(layout, 2048, 0);
+       ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
+       /* NOMMAP is for lockcheck. */
+       ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0,
+                      &tap_log_attr);
+       ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
+       ok1(free_record_length(ntdb, layout->elem[2].base.off) == 2048);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Figure out which bucket (first) free entry is. */
+       b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024));
+       /* Lock and coalesce. */
+       ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
+       test = layout->elem[2].base.off;
+       ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
+           == 1024 + sizeof(struct ntdb_used_record) + 2048);
+       /* Should tell us it's erased this one... */
+       ok1(test == NTDB_ERR_NOEXIST);
+       ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0);
+       ok1(free_record_length(ntdb, layout->elem[1].base.off)
+           == 1024 + sizeof(struct ntdb_used_record) + 2048);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+       ntdb_close(ntdb);
+       ntdb_layout_free(layout);
+
+       /* Coalescing can be done due to two free records, then data */
+       layout = new_ntdb_layout();
+       ntdb_layout_add_freetable(layout);
+       ntdb_layout_add_free(layout, 1024, 0);
+       ntdb_layout_add_free(layout, 512, 0);
+       ntdb_layout_add_used(layout, key, data, 6);
+       ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
+       /* NOMMAP is for lockcheck. */
+       ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0,
+                      &tap_log_attr);
+       ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
+       ok1(free_record_length(ntdb, layout->elem[2].base.off) == 512);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Figure out which bucket free entry is. */
+       b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024));
+       /* Lock and coalesce. */
+       ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
+       test = layout->elem[2].base.off;
+       ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
+           == 1024 + sizeof(struct ntdb_used_record) + 512);
+       ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0);
+       ok1(free_record_length(ntdb, layout->elem[1].base.off)
+           == 1024 + sizeof(struct ntdb_used_record) + 512);
+       ok1(test == NTDB_ERR_NOEXIST);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+       ntdb_close(ntdb);
+       ntdb_layout_free(layout);
+
+       /* Coalescing can be done due to three free records, then EOF */
+       layout = new_ntdb_layout();
+       ntdb_layout_add_freetable(layout);
+       ntdb_layout_add_free(layout, 1024, 0);
+       ntdb_layout_add_free(layout, 512, 0);
+       ntdb_layout_add_free(layout, 256, 0);
+       ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
+       /* NOMMAP is for lockcheck. */
+       ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0,
+                      &tap_log_attr);
+       ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
+       ok1(free_record_length(ntdb, layout->elem[2].base.off) == 512);
+       ok1(free_record_length(ntdb, layout->elem[3].base.off) == 256);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       /* Figure out which bucket free entry is. */
+       b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024));
+       /* Lock and coalesce. */
+       ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
+       test = layout->elem[2].base.off;
+       ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
+           == 1024 + sizeof(struct ntdb_used_record) + 512
+           + sizeof(struct ntdb_used_record) + 256);
+       ok1(ntdb->file->allrecord_lock.count == 0
+           && ntdb->file->num_lockrecs == 0);
+       ok1(free_record_length(ntdb, layout->elem[1].base.off)
+           == 1024 + sizeof(struct ntdb_used_record) + 512
+           + sizeof(struct ntdb_used_record) + 256);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+       ntdb_close(ntdb);
+       ntdb_layout_free(layout);
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
diff --git a/lib/ntdb/test/run-04-basichash.c b/lib/ntdb/test/run-04-basichash.c
new file mode 100644 (file)
index 0000000..6e3bdc0
--- /dev/null
@@ -0,0 +1,260 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+
+/* We rig the hash so adjacent-numbered records always clash. */
+static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
+{
+       return ((uint64_t)*(const unsigned int *)key)
+               << (64 - NTDB_TOPLEVEL_HASH_BITS - 1);
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int i, j;
+       struct ntdb_context *ntdb;
+       unsigned int v;
+       struct ntdb_used_record rec;
+       NTDB_DATA key = { (unsigned char *)&v, sizeof(v) };
+       NTDB_DATA dbuf = { (unsigned char *)&v, sizeof(v) };
+       union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+                                               .fn = clash } };
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT,
+       };
+
+       hattr.base.next = &tap_log_attr;
+
+       plan_tests(sizeof(flags) / sizeof(flags[0])
+                  * (91 + (2 * ((1 << NTDB_HASH_GROUP_BITS) - 1))) + 1);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               struct hash_info h;
+               ntdb_off_t new_off, off, subhash;
+
+               ntdb = ntdb_open("run-04-basichash.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               v = 0;
+               /* Should not find it. */
+               ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+               /* Should have created correct hash. */
+               ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+               /* Should have located space in group 0, bucket 0. */
+               ok1(h.group_start == offsetof(struct ntdb_header, hashtable));
+               ok1(h.home_bucket == 0);
+               ok1(h.found_bucket == 0);
+               ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS);
+
+               /* Should have lock on bucket 0 */
+               ok1(h.hlock_start == 0);
+               ok1(h.hlock_range ==
+                   1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS)));
+               ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+               ok1((ntdb->flags & NTDB_NOLOCK)
+                   || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+               /* FIXME: Check lock length */
+
+               /* Allocate a new record. */
+               new_off = alloc(ntdb, key.dsize, dbuf.dsize, h.h,
+                               NTDB_USED_MAGIC, false);
+               ok1(!NTDB_OFF_IS_ERR(new_off));
+
+               /* We should be able to add it now. */
+               ok1(add_to_hash(ntdb, &h, new_off) == 0);
+
+               /* Make sure we fill it in for later finding. */
+               off = new_off + sizeof(struct ntdb_used_record);
+               ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
+               off += key.dsize;
+               ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
+
+               /* We should be able to unlock that OK. */
+               ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
+                                     F_WRLCK) == 0);
+
+               /* Database should be consistent. */
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               /* Now, this should give a successful lookup. */
+               ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL)
+                   == new_off);
+               /* Should have created correct hash. */
+               ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+               /* Should have located space in group 0, bucket 0. */
+               ok1(h.group_start == offsetof(struct ntdb_header, hashtable));
+               ok1(h.home_bucket == 0);
+               ok1(h.found_bucket == 0);
+               ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS);
+
+               /* Should have lock on bucket 0 */
+               ok1(h.hlock_start == 0);
+               ok1(h.hlock_range ==
+                   1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS)));
+               ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+               ok1((ntdb->flags & NTDB_NOLOCK)
+                   || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+               /* FIXME: Check lock length */
+
+               ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
+                                     F_WRLCK) == 0);
+
+               /* Database should be consistent. */
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               /* Test expansion. */
+               v = 1;
+               ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+               /* Should have created correct hash. */
+               ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+               /* Should have located space in group 0, bucket 1. */
+               ok1(h.group_start == offsetof(struct ntdb_header, hashtable));
+               ok1(h.home_bucket == 0);
+               ok1(h.found_bucket == 1);
+               ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS);
+
+               /* Should have lock on bucket 0 */
+               ok1(h.hlock_start == 0);
+               ok1(h.hlock_range ==
+                   1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS)));
+               ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+               ok1((ntdb->flags & NTDB_NOLOCK)
+                   || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+               /* FIXME: Check lock length */
+
+               /* Make it expand 0'th bucket. */
+               ok1(expand_group(ntdb, &h) == 0);
+               /* First one should be subhash, next should be empty. */
+               ok1(is_subhash(h.group[0]));
+               subhash = (h.group[0] & NTDB_OFF_MASK);
+               for (j = 1; j < (1 << NTDB_HASH_GROUP_BITS); j++)
+                       ok1(h.group[j] == 0);
+
+               ok1(ntdb_write_convert(ntdb, h.group_start,
+                                     h.group, sizeof(h.group)) == 0);
+               ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
+                                     F_WRLCK) == 0);
+
+               /* Should be happy with expansion. */
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               /* Should be able to find it. */
+               v = 0;
+               ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL)
+                   == new_off);
+               /* Should have created correct hash. */
+               ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+               /* Should have located space in expanded group 0, bucket 0. */
+               ok1(h.group_start == subhash + sizeof(struct ntdb_used_record));
+               ok1(h.home_bucket == 0);
+               ok1(h.found_bucket == 0);
+               ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS
+                   + NTDB_SUBLEVEL_HASH_BITS);
+
+               /* Should have lock on bucket 0 */
+               ok1(h.hlock_start == 0);
+               ok1(h.hlock_range ==
+                   1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS)));
+               ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+               ok1((ntdb->flags & NTDB_NOLOCK)
+                   || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+               /* FIXME: Check lock length */
+
+               /* Simple delete should work. */
+               ok1(delete_from_hash(ntdb, &h) == 0);
+               ok1(add_free_record(ntdb, new_off,
+                                   sizeof(struct ntdb_used_record)
+                                   + rec_key_length(&rec)
+                                   + rec_data_length(&rec)
+                                   + rec_extra_padding(&rec),
+                                   NTDB_LOCK_NOWAIT, false) == 0);
+               ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
+                                     F_WRLCK) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               /* Test second-level expansion: should expand 0th bucket. */
+               v = 0;
+               ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+               /* Should have created correct hash. */
+               ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+               /* Should have located space in group 0, bucket 0. */
+               ok1(h.group_start == subhash + sizeof(struct ntdb_used_record));
+               ok1(h.home_bucket == 0);
+               ok1(h.found_bucket == 0);
+               ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS+NTDB_SUBLEVEL_HASH_BITS);
+
+               /* Should have lock on bucket 0 */
+               ok1(h.hlock_start == 0);
+               ok1(h.hlock_range ==
+                   1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS)));
+               ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+               ok1((ntdb->flags & NTDB_NOLOCK)
+                   || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+               /* FIXME: Check lock length */
+
+               ok1(expand_group(ntdb, &h) == 0);
+               /* First one should be subhash, next should be empty. */
+               ok1(is_subhash(h.group[0]));
+               subhash = (h.group[0] & NTDB_OFF_MASK);
+               for (j = 1; j < (1 << NTDB_HASH_GROUP_BITS); j++)
+                       ok1(h.group[j] == 0);
+               ok1(ntdb_write_convert(ntdb, h.group_start,
+                                     h.group, sizeof(h.group)) == 0);
+               ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
+                                     F_WRLCK) == 0);
+
+               /* Should be happy with expansion. */
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+               /* Should have created correct hash. */
+               ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+               /* Should have located space in group 0, bucket 0. */
+               ok1(h.group_start == subhash + sizeof(struct ntdb_used_record));
+               ok1(h.home_bucket == 0);
+               ok1(h.found_bucket == 0);
+               ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS
+                   + NTDB_SUBLEVEL_HASH_BITS * 2);
+
+               /* We should be able to add it now. */
+               /* Allocate a new record. */
+               new_off = alloc(ntdb, key.dsize, dbuf.dsize, h.h,
+                               NTDB_USED_MAGIC, false);
+               ok1(!NTDB_OFF_IS_ERR(new_off));
+               ok1(add_to_hash(ntdb, &h, new_off) == 0);
+
+               /* Make sure we fill it in for later finding. */
+               off = new_off + sizeof(struct ntdb_used_record);
+               ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
+               off += key.dsize;
+               ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
+
+               /* We should be able to unlock that OK. */
+               ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
+                                     F_WRLCK) == 0);
+
+               /* Database should be consistent. */
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               /* Should be able to find it. */
+               v = 0;
+               ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL)
+                   == new_off);
+               /* Should have created correct hash. */
+               ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+               /* Should have located space in expanded group 0, bucket 0. */
+               ok1(h.group_start == subhash + sizeof(struct ntdb_used_record));
+               ok1(h.home_bucket == 0);
+               ok1(h.found_bucket == 0);
+               ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS
+                   + NTDB_SUBLEVEL_HASH_BITS * 2);
+
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
similarity index 57%
rename from lib/tdb2/test/run-05-readonly-open.c
rename to lib/ntdb/test/run-05-readonly-open.c
index 1046a8b47ebc4148e3cd9756e5060e3ee50f36eb..dd5aa26d0d11f3500feef1d2e7f4d9d26e969dc9 100644 (file)
@@ -1,5 +1,5 @@
 #include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include <ccan/failtest/failtest.h>
 #include "logging.h"
@@ -8,55 +8,55 @@
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4), d;
-       union tdb_attribute seed_attr;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data = ntdb_mkdata("data", 4), d;
+       union ntdb_attribute seed_attr;
        unsigned int msgs = 0;
 
        failtest_init(argc, argv);
        failtest_hook = block_repeat_failures;
        failtest_exit_check = exit_check_log;
 
-       seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
+       seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
        seed_attr.base.next = &tap_log_attr;
        seed_attr.seed.seed = 0;
 
        failtest_suppress = true;
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 11);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-05-readonly-open.tdb", flags[i],
+               ntdb = ntdb_open("run-05-readonly-open.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600,
                               &seed_attr);
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               tdb_close(tdb);
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+               ntdb_close(ntdb);
 
                failtest_suppress = false;
-               tdb = tdb_open("run-05-readonly-open.tdb", flags[i],
+               ntdb = ntdb_open("run-05-readonly-open.ntdb", flags[i],
                               O_RDONLY, 0600, &tap_log_attr);
-               if (!ok1(tdb))
+               if (!ok1(ntdb))
                        break;
                ok1(tap_log_messages == msgs);
                /* Fetch should succeed, stores should fail. */
-               if (!ok1(tdb_fetch(tdb, key, &d) == 0))
+               if (!ok1(ntdb_fetch(ntdb, key, &d) == 0))
                        goto fail;
-               ok1(tdb_deq(d, data));
+               ok1(ntdb_deq(d, data));
                free(d.dptr);
-               if (!ok1(tdb_store(tdb, key, data, TDB_MODIFY)
-                        == TDB_ERR_RDONLY))
+               if (!ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY)
+                        == NTDB_ERR_RDONLY))
                        goto fail;
                ok1(tap_log_messages == ++msgs);
-               if (!ok1(tdb_store(tdb, key, data, TDB_INSERT)
-                        == TDB_ERR_RDONLY))
+               if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT)
+                        == NTDB_ERR_RDONLY))
                        goto fail;
                ok1(tap_log_messages == ++msgs);
                failtest_suppress = true;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               tdb_close(tdb);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ntdb_close(ntdb);
                ok1(tap_log_messages == msgs);
-               /* SIGH: failtest bug, it doesn't save the tdb file because
+               /* SIGH: failtest bug, it doesn't save the ntdb file because
                 * we have it read-only.  If we go around again, it gets
                 * changed underneath us and things get screwy. */
                if (failtest_has_failed())
@@ -66,6 +66,6 @@ int main(int argc, char *argv[])
 
 fail:
        failtest_suppress = true;
-       tdb_close(tdb);
+       ntdb_close(ntdb);
        failtest_exit(exit_status());
 }
similarity index 57%
rename from lib/tdb2/test/run-10-simple-store.c
rename to lib/ntdb/test/run-10-simple-store.c
index 66bf6a6a51746402793205699b3f911908985af4..6e718bf61f5379276a5fa9e79ec89c5e4dbc53ec 100644 (file)
@@ -1,5 +1,5 @@
 #include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include <ccan/failtest/failtest.h>
 #include "logging.h"
@@ -8,12 +8,12 @@
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data = ntdb_mkdata("data", 4);
 
        failtest_init(argc, argv);
        failtest_hook = block_repeat_failures;
@@ -22,37 +22,37 @@ int main(int argc, char *argv[])
        failtest_suppress = true;
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-10-simple-store.tdb", flags[i],
+               ntdb = ntdb_open("run-10-simple-store.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
+               if (!ok1(ntdb))
                        break;
                /* Modify should fail. */
                failtest_suppress = false;
-               if (!ok1(tdb_store(tdb, key, data, TDB_MODIFY)
-                        == TDB_ERR_NOEXIST))
+               if (!ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY)
+                        == NTDB_ERR_NOEXIST))
                        goto fail;
                failtest_suppress = true;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
                /* Insert should succeed. */
                failtest_suppress = false;
-               if (!ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0))
+               if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0))
                        goto fail;
                failtest_suppress = true;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
                /* Second insert should fail. */
                failtest_suppress = false;
-               if (!ok1(tdb_store(tdb, key, data, TDB_INSERT)
-                        == TDB_ERR_EXISTS))
+               if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT)
+                        == NTDB_ERR_EXISTS))
                        goto fail;
                failtest_suppress = true;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               tdb_close(tdb);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ntdb_close(ntdb);
        }
        ok1(tap_log_messages == 0);
        failtest_exit(exit_status());
 
 fail:
        failtest_suppress = true;
-       tdb_close(tdb);
+       ntdb_close(ntdb);
        failtest_exit(exit_status());
 }
similarity index 53%
rename from lib/tdb2/test/run-11-simple-fetch.c
rename to lib/ntdb/test/run-11-simple-fetch.c
index 4c41ceec6d01200c8cf2a313ce3e82d9f9a4e065..525cf46444c01832e26e9db24a1cd210dfda5d8f 100644 (file)
@@ -1,5 +1,5 @@
 #include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include <ccan/failtest/failtest.h>
 #include "logging.h"
@@ -8,12 +8,12 @@
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data = ntdb_mkdata("data", 4);
 
        failtest_init(argc, argv);
        failtest_hook = block_repeat_failures;
@@ -22,30 +22,30 @@ int main(int argc, char *argv[])
        failtest_suppress = true;
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-11-simple-fetch.tdb", flags[i],
+               ntdb = ntdb_open("run-11-simple-fetch.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (tdb) {
-                       struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
+               ok1(ntdb);
+               if (ntdb) {
+                       NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
 
                        /* fetch should fail. */
                        failtest_suppress = false;
-                       if (!ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_NOEXIST))
+                       if (!ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_NOEXIST))
                                goto fail;
                        failtest_suppress = true;
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
                        /* Insert should succeed. */
-                       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
+                       ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
                        /* Fetch should now work. */
                        failtest_suppress = false;
-                       if (!ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS))
+                       if (!ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS))
                                goto fail;
                        failtest_suppress = true;
-                       ok1(tdb_deq(d, data));
+                       ok1(ntdb_deq(d, data));
                        free(d.dptr);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       tdb_close(tdb);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+                       ntdb_close(ntdb);
                }
        }
        ok1(tap_log_messages == 0);
@@ -53,6 +53,6 @@ int main(int argc, char *argv[])
 
 fail:
        failtest_suppress = true;
-       tdb_close(tdb);
+       ntdb_close(ntdb);
        failtest_exit(exit_status());
 }
similarity index 60%
rename from lib/tdb2/test/run-12-check.c
rename to lib/ntdb/test/run-12-check.c
index cc57726f930b30e2cc21ac45d261ed72926cdf74..604063704861ca02ea1e192b3cf4189ef6d571c4 100644 (file)
@@ -1,6 +1,6 @@
 #include "private.h"
 #include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include <ccan/failtest/failtest.h>
 #include "logging.h"
@@ -9,12 +9,12 @@
 int main(int argc, char *argv[])
 {
        unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL,
-                       TDB_INTERNAL|TDB_CONVERT,
-                       TDB_CONVERT };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL,
+                       NTDB_INTERNAL|NTDB_CONVERT,
+                       NTDB_CONVERT };
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data = ntdb_mkdata("data", 4);
 
        failtest_init(argc, argv);
        failtest_hook = block_repeat_failures;
@@ -23,24 +23,24 @@ int main(int argc, char *argv[])
        failtest_suppress = true;
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-12-check.tdb", flags[i],
+               ntdb = ntdb_open("run-12-check.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
+               ok1(ntdb);
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
 
-               /* This is what we really want to test: tdb_check(). */
+               /* This is what we really want to test: ntdb_check(). */
                failtest_suppress = false;
-               if (!ok1(tdb_check(tdb, NULL, NULL) == 0))
+               if (!ok1(ntdb_check(ntdb, NULL, NULL) == 0))
                        goto fail;
                failtest_suppress = true;
 
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
        ok1(tap_log_messages == 0);
        failtest_exit(exit_status());
 
 fail:
        failtest_suppress = true;
-       tdb_close(tdb);
+       ntdb_close(ntdb);
        failtest_exit(exit_status());
 }
similarity index 52%
rename from lib/tdb2/test/run-15-append.c
rename to lib/ntdb/test/run-15-append.c
index 6578b7073402126861e3122d7a84b4cd47ba51fd..3c208137f2798395e9966447c8aaba47eb0fcca3 100644 (file)
@@ -1,4 +1,4 @@
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include <ccan/ilog/ilog.h>
 #include "logging.h"
@@ -6,30 +6,30 @@
 #define MAX_SIZE 13100
 #define SIZE_STEP 131
 
-static tdb_off_t tdb_offset(struct tdb_context *tdb, struct tdb_data key)
+static ntdb_off_t ntdb_offset(struct ntdb_context *ntdb, NTDB_DATA key)
 {
-       tdb_off_t off;
-       struct tdb_used_record urec;
+       ntdb_off_t off;
+       struct ntdb_used_record urec;
        struct hash_info h;
 
-       off = find_and_lock(tdb, key, F_RDLCK, &h, &urec, NULL);
-       if (TDB_OFF_IS_ERR(off))
+       off = find_and_lock(ntdb, key, F_RDLCK, &h, &urec, NULL);
+       if (NTDB_OFF_IS_ERR(off))
                return 0;
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
+       ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
        return off;
 }
 
 int main(int argc, char *argv[])
 {
        unsigned int i, j, moves;
-       struct tdb_context *tdb;
+       struct ntdb_context *ntdb;
        unsigned char *buffer;
-       tdb_off_t oldoff = 0, newoff;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data;
+       ntdb_off_t oldoff = 0, newoff;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data;
 
        buffer = malloc(MAX_SIZE);
        for (i = 0; i < MAX_SIZE; i++)
@@ -39,89 +39,89 @@ int main(int argc, char *argv[])
                   * ((3 + MAX_SIZE/SIZE_STEP * 5) * 2 + 7)
                   + 1);
 
-       /* Using tdb_store. */
+       /* Using ntdb_store. */
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-append.tdb", flags[i],
+               ntdb = ntdb_open("run-append.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
                moves = 0;
                for (j = 0; j < MAX_SIZE; j += SIZE_STEP) {
                        data.dptr = buffer;
                        data.dsize = j;
-                       ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
+                       ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+                       ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
                        ok1(data.dsize == j);
                        ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
                        free(data.dptr);
-                       newoff = tdb_offset(tdb, key);
+                       newoff = ntdb_offset(ntdb, key);
                        if (newoff != oldoff)
                                moves++;
                        oldoff = newoff;
                }
-               ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
-                                  && tdb->file->num_lockrecs == 0));
+               ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
+                                  && ntdb->file->num_lockrecs == 0));
                /* We should increase by 50% each time... */
                ok(moves <= ilog64(j / SIZE_STEP)*2,
                   "Moved %u times", moves);
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
-       /* Using tdb_append. */
+       /* Using ntdb_append. */
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
                size_t prev_len = 0;
-               tdb = tdb_open("run-append.tdb", flags[i],
+               ntdb = ntdb_open("run-append.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
                moves = 0;
                for (j = 0; j < MAX_SIZE; j += SIZE_STEP) {
                        data.dptr = buffer + prev_len;
                        data.dsize = j - prev_len;
-                       ok1(tdb_append(tdb, key, data) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
+                       ok1(ntdb_append(ntdb, key, data) == 0);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+                       ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
                        ok1(data.dsize == j);
                        ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
                        free(data.dptr);
                        prev_len = data.dsize;
-                       newoff = tdb_offset(tdb, key);
+                       newoff = ntdb_offset(ntdb, key);
                        if (newoff != oldoff)
                                moves++;
                        oldoff = newoff;
                }
-               ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
-                                  && tdb->file->num_lockrecs == 0));
+               ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
+                                  && ntdb->file->num_lockrecs == 0));
                /* We should increase by 50% each time... */
                ok(moves <= ilog64(j / SIZE_STEP)*2,
                   "Moved %u times", moves);
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-append.tdb", flags[i],
+               ntdb = ntdb_open("run-append.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
                /* Huge initial store. */
                data.dptr = buffer;
                data.dsize = MAX_SIZE;
-               ok1(tdb_append(tdb, key, data) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
+               ok1(ntdb_append(ntdb, key, data) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
                ok1(data.dsize == MAX_SIZE);
                ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
                free(data.dptr);
-               ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
-                                  && tdb->file->num_lockrecs == 0));
-               tdb_close(tdb);
+               ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
+                                  && ntdb->file->num_lockrecs == 0));
+               ntdb_close(ntdb);
        }
 
        ok1(tap_log_messages == 0);
diff --git a/lib/ntdb/test/run-20-growhash.c b/lib/ntdb/test/run-20-growhash.c
new file mode 100644 (file)
index 0000000..5559370
--- /dev/null
@@ -0,0 +1,137 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+
+static uint64_t myhash(const void *key, size_t len, uint64_t seed, void *priv)
+{
+       return *(const uint64_t *)key;
+}
+
+static void add_bits(uint64_t *val, unsigned new, unsigned new_bits,
+                    unsigned *done)
+{
+       *done += new_bits;
+       *val |= ((uint64_t)new << (64 - *done));
+}
+
+static uint64_t make_key(unsigned topgroup, unsigned topbucket,
+                        unsigned subgroup1, unsigned subbucket1,
+                        unsigned subgroup2, unsigned subbucket2)
+{
+       uint64_t key = 0;
+       unsigned done = 0;
+
+       add_bits(&key, topgroup, NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS,
+                &done);
+       add_bits(&key, topbucket, NTDB_HASH_GROUP_BITS, &done);
+       add_bits(&key, subgroup1, NTDB_SUBLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS,
+                &done);
+       add_bits(&key, subbucket1, NTDB_HASH_GROUP_BITS, &done);
+       add_bits(&key, subgroup2, NTDB_SUBLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS,
+                &done);
+       add_bits(&key, subbucket2, NTDB_HASH_GROUP_BITS, &done);
+       return key;
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int i, j;
+       struct ntdb_context *ntdb;
+       uint64_t kdata;
+       struct ntdb_used_record rec;
+       NTDB_DATA key = { (unsigned char *)&kdata, sizeof(kdata) };
+       NTDB_DATA dbuf = { (unsigned char *)&kdata, sizeof(kdata) };
+       union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+                                               .fn = myhash } };
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT,
+       };
+
+       hattr.base.next = &tap_log_attr;
+
+       plan_tests(sizeof(flags) / sizeof(flags[0])
+                  * (9 + (20 + 2 * ((1 << NTDB_HASH_GROUP_BITS) - 2))
+                     * (1 << NTDB_HASH_GROUP_BITS)) + 1);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               struct hash_info h;
+
+               ntdb = ntdb_open("run-20-growhash.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               /* Fill a group. */
+               for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) {
+                       kdata = make_key(0, j, 0, 0, 0, 0);
+                       ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+               }
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               /* Check first still exists. */
+               kdata = make_key(0, 0, 0, 0, 0, 0);
+               ok1(find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL) != 0);
+               /* Should have created correct hash. */
+               ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+               /* Should have located space in group 0, bucket 0. */
+               ok1(h.group_start == offsetof(struct ntdb_header, hashtable));
+               ok1(h.home_bucket == 0);
+               ok1(h.found_bucket == 0);
+               ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS);
+               /* Entire group should be full! */
+               for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++)
+                       ok1(h.group[j] != 0);
+
+               ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
+                                     F_RDLCK) == 0);
+
+               /* Now, add one more to each should expand (that) bucket. */
+               for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) {
+                       unsigned int k;
+                       kdata = make_key(0, j, 0, 1, 0, 0);
+                       ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+                       ok1(find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL));
+                       /* Should have created correct hash. */
+                       ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+                       /* Should have moved to subhash */
+                       ok1(h.group_start >= sizeof(struct ntdb_header));
+                       ok1(h.home_bucket == 1);
+                       ok1(h.found_bucket == 1);
+                       ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS
+                           + NTDB_SUBLEVEL_HASH_BITS);
+                       ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
+                                             F_RDLCK) == 0);
+
+                       /* Keep adding, make it expand again. */
+                       for (k = 2; k < (1 << NTDB_HASH_GROUP_BITS); k++) {
+                               kdata = make_key(0, j, 0, k, 0, 0);
+                               ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+                               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+                       }
+
+                       /* This should tip it over to sub-sub-hash. */
+                       kdata = make_key(0, j, 0, 0, 0, 1);
+                       ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+                       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+                       ok1(find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL));
+                       /* Should have created correct hash. */
+                       ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+                       /* Should have moved to subhash */
+                       ok1(h.group_start >= sizeof(struct ntdb_header));
+                       ok1(h.home_bucket == 1);
+                       ok1(h.found_bucket == 1);
+                       ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS
+                           + NTDB_SUBLEVEL_HASH_BITS + NTDB_SUBLEVEL_HASH_BITS);
+                       ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
+                                             F_RDLCK) == 0);
+               }
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
diff --git a/lib/ntdb/test/run-25-hashoverload.c b/lib/ntdb/test/run-25-hashoverload.c
new file mode 100644 (file)
index 0000000..611eb71
--- /dev/null
@@ -0,0 +1,113 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+
+static uint64_t badhash(const void *key, size_t len, uint64_t seed, void *priv)
+{
+       return 0;
+}
+
+static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *p)
+{
+       if (p)
+               return ntdb_delete(ntdb, key);
+       return 0;
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int i, j;
+       struct ntdb_context *ntdb;
+       NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
+       NTDB_DATA dbuf = { (unsigned char *)&j, sizeof(j) };
+       union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+                                               .fn = badhash } };
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT,
+       };
+
+       hattr.base.next = &tap_log_attr;
+
+       plan_tests(6883);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
+
+               ntdb = ntdb_open("run-25-hashoverload.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               /* Fill a group. */
+               for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) {
+                       ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+               }
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               /* Now store one last value: should form chain. */
+               ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               /* Check we can find them all. */
+               for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS) + 1; j++) {
+                       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+                       ok1(d.dsize == sizeof(j));
+                       ok1(d.dptr != NULL);
+                       ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
+                       free(d.dptr);
+               }
+
+               /* Now add a *lot* more. */
+               for (j = (1 << NTDB_HASH_GROUP_BITS) + 1;
+                    j < (16 << NTDB_HASH_GROUP_BITS);
+                    j++) {
+                       ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+                       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+                       ok1(d.dsize == sizeof(j));
+                       ok1(d.dptr != NULL);
+                       ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
+                       free(d.dptr);
+               }
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               /* Traverse through them. */
+               ok1(ntdb_traverse(ntdb, trav, NULL) == j);
+
+               /* Empty the first chain-worth. */
+               for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++)
+                       ok1(ntdb_delete(ntdb, key) == 0);
+
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               for (j = (1 << NTDB_HASH_GROUP_BITS);
+                    j < (16 << NTDB_HASH_GROUP_BITS);
+                    j++) {
+                       ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+                       ok1(d.dsize == sizeof(j));
+                       ok1(d.dptr != NULL);
+                       ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
+                       free(d.dptr);
+               }
+
+               /* Traverse through them. */
+               ok1(ntdb_traverse(ntdb, trav, NULL)
+                   == (15 << NTDB_HASH_GROUP_BITS));
+
+               /* Re-add */
+               for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) {
+                       ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+               }
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+               /* Now try deleting as we go. */
+               ok1(ntdb_traverse(ntdb, trav, trav)
+                   == (16 << NTDB_HASH_GROUP_BITS));
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ok1(ntdb_traverse(ntdb, trav, NULL) == 0);
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
diff --git a/lib/ntdb/test/run-30-exhaust-before-expand.c b/lib/ntdb/test/run-30-exhaust-before-expand.c
new file mode 100644 (file)
index 0000000..b94bc01
--- /dev/null
@@ -0,0 +1,71 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+
+static bool empty_freetable(struct ntdb_context *ntdb)
+{
+       struct ntdb_freetable ftab;
+       unsigned int i;
+
+       /* Now, free table should be completely exhausted in zone 0 */
+       if (ntdb_read_convert(ntdb, ntdb->ftable_off, &ftab, sizeof(ftab)) != 0)
+               abort();
+
+       for (i = 0; i < sizeof(ftab.buckets)/sizeof(ftab.buckets[0]); i++) {
+               if (ftab.buckets[i])
+                       return false;
+       }
+       return true;
+}
+
+
+int main(int argc, char *argv[])
+{
+       unsigned int i, j;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1);
+
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               NTDB_DATA k;
+               uint64_t size;
+               bool was_empty = false;
+
+               k.dptr = (void *)&j;
+               k.dsize = sizeof(j);
+
+               ntdb = ntdb_open("run-30-exhaust-before-expand.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               ok1(empty_freetable(ntdb));
+               /* Need some hash lock for expand. */
+               ok1(ntdb_lock_hashes(ntdb, 0, 1, F_WRLCK, NTDB_LOCK_WAIT) == 0);
+               /* Create some free space. */
+               ok1(ntdb_expand(ntdb, 1) == 0);
+               ok1(ntdb_unlock_hashes(ntdb, 0, 1, F_WRLCK) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ok1(!empty_freetable(ntdb));
+
+               size = ntdb->file->map_size;
+               /* Insert minimal-length records until we expand. */
+               for (j = 0; ntdb->file->map_size == size; j++) {
+                       was_empty = empty_freetable(ntdb);
+                       if (ntdb_store(ntdb, k, k, NTDB_INSERT) != 0)
+                               err(1, "Failed to store record %i", j);
+               }
+
+               /* Would have been empty before expansion, but no longer. */
+               ok1(was_empty);
+               ok1(!empty_freetable(ntdb));
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
similarity index 52%
rename from lib/tdb2/test/run-35-convert.c
rename to lib/ntdb/test/run-35-convert.c
index ac7939591bd0622641f7dceea779416af9cdd011..6a38d425cbaf03fa6665c77aeac2816c7db60676 100644 (file)
@@ -1,6 +1,6 @@
 #include "private.h"
 #include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include <ccan/failtest/failtest.h>
 #include "logging.h"
@@ -9,46 +9,46 @@
 int main(int argc, char *argv[])
 {
        unsigned int i, messages = 0;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
 
        failtest_init(argc, argv);
        failtest_hook = block_repeat_failures;
        failtest_exit_check = exit_check_log;
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 4);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-35-convert.tdb", flags[i],
+               ntdb = ntdb_open("run-35-convert.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
+               if (!ok1(ntdb))
                        failtest_exit(exit_status());
 
-               tdb_close(tdb);
-               /* If we say TDB_CONVERT, it must be converted */
-               tdb = tdb_open("run-35-convert.tdb",
-                              flags[i]|TDB_CONVERT,
+               ntdb_close(ntdb);
+               /* If we say NTDB_CONVERT, it must be converted */
+               ntdb = ntdb_open("run-35-convert.ntdb",
+                              flags[i]|NTDB_CONVERT,
                               O_RDWR, 0600, &tap_log_attr);
-               if (flags[i] & TDB_CONVERT) {
-                       if (!tdb)
+               if (flags[i] & NTDB_CONVERT) {
+                       if (!ntdb)
                                failtest_exit(exit_status());
-                       ok1(tdb_get_flags(tdb) & TDB_CONVERT);
-                       tdb_close(tdb);
+                       ok1(ntdb_get_flags(ntdb) & NTDB_CONVERT);
+                       ntdb_close(ntdb);
                } else {
-                       if (!ok1(!tdb && errno == EIO))
+                       if (!ok1(!ntdb && errno == EIO))
                                failtest_exit(exit_status());
                        ok1(tap_log_messages == ++messages);
-                       if (!ok1(log_last && strstr(log_last, "TDB_CONVERT")))
+                       if (!ok1(log_last && strstr(log_last, "NTDB_CONVERT")))
                                failtest_exit(exit_status());
                }
 
-               /* If don't say TDB_CONVERT, it *may* be converted */
-               tdb = tdb_open("run-35-convert.tdb",
-                              flags[i] & ~TDB_CONVERT,
+               /* If don't say NTDB_CONVERT, it *may* be converted */
+               ntdb = ntdb_open("run-35-convert.ntdb",
+                              flags[i] & ~NTDB_CONVERT,
                               O_RDWR, 0600, &tap_log_attr);
-               if (!tdb)
+               if (!ntdb)
                        failtest_exit(exit_status());
-               ok1(tdb_get_flags(tdb) == flags[i]);
-               tdb_close(tdb);
+               ok1(ntdb_get_flags(ntdb) == flags[i]);
+               ntdb_close(ntdb);
        }
        failtest_exit(exit_status());
 }
diff --git a/lib/ntdb/test/run-50-multiple-freelists.c b/lib/ntdb/test/run-50-multiple-freelists.c
new file mode 100644 (file)
index 0000000..962462e
--- /dev/null
@@ -0,0 +1,70 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "layout.h"
+
+int main(int argc, char *argv[])
+{
+       ntdb_off_t off;
+       struct ntdb_context *ntdb;
+       struct ntdb_layout *layout;
+       NTDB_DATA key, data;
+       union ntdb_attribute seed;
+
+       /* This seed value previously tickled a layout.c bug. */
+       seed.base.attr = NTDB_ATTRIBUTE_SEED;
+       seed.seed.seed = 0xb1142bc054d035b4ULL;
+       seed.base.next = &tap_log_attr;
+
+       plan_tests(11);
+       key = ntdb_mkdata("Hello", 5);
+       data = ntdb_mkdata("world", 5);
+
+       /* Create a NTDB with three free tables. */
+       layout = new_ntdb_layout();
+       ntdb_layout_add_freetable(layout);
+       ntdb_layout_add_freetable(layout);
+       ntdb_layout_add_freetable(layout);
+       ntdb_layout_add_free(layout, 80, 0);
+       /* Used record prevent coalescing. */
+       ntdb_layout_add_used(layout, key, data, 6);
+       ntdb_layout_add_free(layout, 160, 1);
+       key.dsize--;
+       ntdb_layout_add_used(layout, key, data, 7);
+       ntdb_layout_add_free(layout, 320, 2);
+       key.dsize--;
+       ntdb_layout_add_used(layout, key, data, 8);
+       ntdb_layout_add_free(layout, 40, 0);
+       ntdb = ntdb_layout_get(layout, free, &seed);
+       ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+       off = get_free(ntdb, 0, 80 - sizeof(struct ntdb_used_record), 0,
+                      NTDB_USED_MAGIC, 0);
+       ok1(off == layout->elem[3].base.off);
+       ok1(ntdb->ftable_off == layout->elem[0].base.off);
+
+       off = get_free(ntdb, 0, 160 - sizeof(struct ntdb_used_record), 0,
+                      NTDB_USED_MAGIC, 0);
+       ok1(off == layout->elem[5].base.off);
+       ok1(ntdb->ftable_off == layout->elem[1].base.off);
+
+       off = get_free(ntdb, 0, 320 - sizeof(struct ntdb_used_record), 0,
+                      NTDB_USED_MAGIC, 0);
+       ok1(off == layout->elem[7].base.off);
+       ok1(ntdb->ftable_off == layout->elem[2].base.off);
+
+       off = get_free(ntdb, 0, 40 - sizeof(struct ntdb_used_record), 0,
+                      NTDB_USED_MAGIC, 0);
+       ok1(off == layout->elem[9].base.off);
+       ok1(ntdb->ftable_off == layout->elem[0].base.off);
+
+       /* Now we fail. */
+       off = get_free(ntdb, 0, 0, 1, NTDB_USED_MAGIC, 0);
+       ok1(off == 0);
+
+       ntdb_close(ntdb);
+       ntdb_layout_free(layout);
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
similarity index 81%
rename from lib/tdb2/test/run-56-open-during-transaction.c
rename to lib/ntdb/test/run-56-open-during-transaction.c
index c514caa92b9cd7694465b74b8e6cb26025fbba2b..f585aa13c818e05917d0e941b71ffff9d490f83c 100644 (file)
@@ -11,7 +11,7 @@ static int ftruncate_check(int fd, off_t length);
 #define fcntl fcntl_with_lockcheck
 #define ftruncate ftruncate_check
 
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include <stdlib.h>
 #include <stdbool.h>
@@ -22,7 +22,7 @@ static int ftruncate_check(int fd, off_t length);
 static struct agent *agent;
 static bool opened;
 static int errors = 0;
-#define TEST_DBNAME "run-56-open-during-transaction.tdb"
+#define TEST_DBNAME "run-56-open-during-transaction.ntdb"
 
 #undef write
 #undef pwrite
@@ -80,7 +80,7 @@ static void check_file_intact(int fd)
        if (ret == SUCCESS) {
                ret = external_agent_operation(agent, CLOSE, NULL);
                if (ret != SUCCESS) {
-                       diag("Agent failed to close tdb: %s",
+                       diag("Agent failed to close ntdb: %s",
                             agent_return_name(ret));
                        errors++;
                }
@@ -127,11 +127,11 @@ static int ftruncate_check(int fd, off_t length)
 
 int main(int argc, char *argv[])
 {
-       const int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
+       const int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
        int i;
-       struct tdb_context *tdb;
-       TDB_DATA key, data;
+       struct ntdb_context *ntdb;
+       NTDB_DATA key, data;
 
        plan_tests(sizeof(flags)/sizeof(flags[0]) * 5);
        agent = prepare_external_agent();
@@ -141,24 +141,24 @@ int main(int argc, char *argv[])
        unlock_callback = after_unlock;
        for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) {
                diag("Test with %s and %s\n",
-                    (flags[i] & TDB_CONVERT) ? "CONVERT" : "DEFAULT",
-                    (flags[i] & TDB_NOMMAP) ? "no mmap" : "mmap");
+                    (flags[i] & NTDB_CONVERT) ? "CONVERT" : "DEFAULT",
+                    (flags[i] & NTDB_NOMMAP) ? "no mmap" : "mmap");
                unlink(TEST_DBNAME);
-               tdb = tdb_open(TEST_DBNAME, flags[i],
+               ntdb = ntdb_open(TEST_DBNAME, flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
+               ok1(ntdb);
 
                opened = true;
-               ok1(tdb_transaction_start(tdb) == 0);
-               key = tdb_mkdata("hi", strlen("hi"));
-               data = tdb_mkdata("world", strlen("world"));
+               ok1(ntdb_transaction_start(ntdb) == 0);
+               key = ntdb_mkdata("hi", strlen("hi"));
+               data = ntdb_mkdata("world", strlen("world"));
 
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb_transaction_commit(tdb) == 0);
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+               ok1(ntdb_transaction_commit(ntdb) == 0);
                ok(!errors, "We had %u open errors", errors);
 
                opened = false;
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
        return exit_status();
similarity index 90%
rename from lib/tdb2/test/run-57-die-during-transaction.c
rename to lib/ntdb/test/run-57-die-during-transaction.c
index ee33a896fffa78d580eb32e2e24c3fbb64166ae8..98ec9dd63a7caf1d78b4458c621c417c6c026658 100644 (file)
@@ -81,7 +81,7 @@ static void free_all(void)
 #define free free_noleak
 #define realloc realloc_noleak
 
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 
 #undef malloc
 #undef free
@@ -93,6 +93,7 @@ static void free_all(void)
 
 #include <stdbool.h>
 #include <stdarg.h>
+#include <ccan/err/err.h>
 #include <setjmp.h>
 #include "external-agent.h"
 #include "logging.h"
@@ -100,7 +101,7 @@ static void free_all(void)
 static bool in_transaction;
 static int target, current;
 static jmp_buf jmpbuf;
-#define TEST_DBNAME "run-57-die-during-transaction.tdb"
+#define TEST_DBNAME "run-57-die-during-transaction.ntdb"
 #define KEY_STRING "helloworld"
 
 static void maybe_die(int fd)
@@ -153,24 +154,24 @@ static int ftruncate_check(int fd, off_t length)
 
 static bool test_death(enum operation op, struct agent *agent)
 {
-       struct tdb_context *tdb = NULL;
-       TDB_DATA key;
+       struct ntdb_context *ntdb = NULL;
+       NTDB_DATA key;
        enum agent_return ret;
        int needed_recovery = 0;
 
        current = target = 0;
 reset:
        unlink(TEST_DBNAME);
-       tdb = tdb_open(TEST_DBNAME, TDB_NOMMAP,
+       ntdb = ntdb_open(TEST_DBNAME, NTDB_NOMMAP,
                       O_CREAT|O_TRUNC|O_RDWR, 0600, &tap_log_attr);
-       if (!tdb) {
-               diag("Failed opening TDB: %s", strerror(errno));
+       if (!ntdb) {
+               diag("Failed opening NTDB: %s", strerror(errno));
                return false;
        }
 
        if (setjmp(jmpbuf) != 0) {
                /* We're partway through.  Simulate our death. */
-               close(tdb->file->fd);
+               close(ntdb->file->fd);
                forget_locking();
                in_transaction = false;
 
@@ -215,7 +216,7 @@ reset:
                /* Suppress logging as this tries to use closed fd. */
                suppress_logging = true;
                suppress_lockcheck = true;
-               tdb_close(tdb);
+               ntdb_close(ntdb);
                suppress_logging = false;
                suppress_lockcheck = false;
                target++;
@@ -225,8 +226,8 @@ reset:
        }
 
        /* Put key for agent to fetch. */
-       key = tdb_mkdata(KEY_STRING, strlen(KEY_STRING));
-       if (tdb_store(tdb, key, key, TDB_INSERT) != 0)
+       key = ntdb_mkdata(KEY_STRING, strlen(KEY_STRING));
+       if (ntdb_store(ntdb, key, key, NTDB_INSERT) != 0)
                return false;
 
        /* This is the key we insert in transaction. */
@@ -241,20 +242,20 @@ reset:
                errx(1, "Agent failed find key: %s", agent_return_name(ret));
 
        in_transaction = true;
-       if (tdb_transaction_start(tdb) != 0)
+       if (ntdb_transaction_start(ntdb) != 0)
                return false;
 
-       if (tdb_store(tdb, key, key, TDB_INSERT) != 0)
+       if (ntdb_store(ntdb, key, key, NTDB_INSERT) != 0)
                return false;
 
-       if (tdb_transaction_commit(tdb) != 0)
+       if (ntdb_transaction_commit(ntdb) != 0)
                return false;
 
        in_transaction = false;
 
        /* We made it! */
        diag("Completed %u runs", current);
-       tdb_close(tdb);
+       ntdb_close(ntdb);
        ret = external_agent_operation(agent, CLOSE, "");
        if (ret != SUCCESS) {
                diag("Step %u close failed = %s", current,
diff --git a/lib/ntdb/test/run-64-bit-tdb.c b/lib/ntdb/test/run-64-bit-tdb.c
new file mode 100644 (file)
index 0000000..6a146cb
--- /dev/null
@@ -0,0 +1,72 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+
+       if (sizeof(off_t) <= 4) {
+               plan_tests(1);
+               pass("No 64 bit off_t");
+               return exit_status();
+       }
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 14);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               off_t old_size;
+               NTDB_DATA k, d;
+               struct hash_info h;
+               struct ntdb_used_record rec;
+               ntdb_off_t off;
+
+               ntdb = ntdb_open("run-64-bit-ntdb.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               old_size = ntdb->file->map_size;
+
+               /* This makes a sparse file */
+               ok1(ftruncate(ntdb->file->fd, 0xFFFFFFF0) == 0);
+               ok1(add_free_record(ntdb, old_size, 0xFFFFFFF0 - old_size,
+                                   NTDB_LOCK_WAIT, false) == NTDB_SUCCESS);
+
+               /* Now add a little record past the 4G barrier. */
+               ok1(ntdb_expand_file(ntdb, 100) == NTDB_SUCCESS);
+               ok1(add_free_record(ntdb, 0xFFFFFFF0, 100, NTDB_LOCK_WAIT, false)
+                   == NTDB_SUCCESS);
+
+               ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+
+               /* Test allocation path. */
+               k = ntdb_mkdata("key", 4);
+               d = ntdb_mkdata("data", 5);
+               ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+
+               /* Make sure it put it at end as we expected. */
+               off = find_and_lock(ntdb, k, F_RDLCK, &h, &rec, NULL);
+               ok1(off >= 0xFFFFFFF0);
+               ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+
+               ok1(ntdb_fetch(ntdb, k, &d) == 0);
+               ok1(d.dsize == 5);
+               ok1(strcmp((char *)d.dptr, "data") == 0);
+               free(d.dptr);
+
+               ok1(ntdb_delete(ntdb, k) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+
+               ntdb_close(ntdb);
+       }
+
+       /* We might get messages about mmap failing, so don't test
+        * tap_log_messages */
+       return exit_status();
+}
diff --git a/lib/ntdb/test/run-90-get-set-attributes.c b/lib/ntdb/test/run-90-get-set-attributes.c
new file mode 100644 (file)
index 0000000..fc265b0
--- /dev/null
@@ -0,0 +1,159 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+
+static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
+                 void *unused)
+{
+       return 0;
+}
+
+static int myunlock(int fd, int rw, off_t off, off_t len, void *unused)
+{
+       return 0;
+}
+
+static uint64_t hash_fn(const void *key, size_t len, uint64_t seed,
+                       void *priv)
+{
+       return 0;
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+       union ntdb_attribute seed_attr;
+       union ntdb_attribute hash_attr;
+       union ntdb_attribute lock_attr;
+
+       seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
+       seed_attr.base.next = &hash_attr;
+       seed_attr.seed.seed = 100;
+
+       hash_attr.base.attr = NTDB_ATTRIBUTE_HASH;
+       hash_attr.base.next = &lock_attr;
+       hash_attr.hash.fn = hash_fn;
+       hash_attr.hash.data = &hash_attr;
+
+       lock_attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
+       lock_attr.base.next = &tap_log_attr;
+       lock_attr.flock.lock = mylock;
+       lock_attr.flock.unlock = myunlock;
+       lock_attr.flock.data = &lock_attr;
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 50);
+
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               union ntdb_attribute attr;
+
+               /* First open with no attributes. */
+               ntdb = ntdb_open("run-90-get-set-attributes.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
+               ok1(ntdb);
+
+               /* Get log on no attributes will fail */
+               attr.base.attr = NTDB_ATTRIBUTE_LOG;
+               ok1(ntdb_get_attribute(ntdb, &attr) == NTDB_ERR_NOEXIST);
+               /* These always work. */
+               attr.base.attr = NTDB_ATTRIBUTE_HASH;
+               ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+               ok1(attr.base.attr == NTDB_ATTRIBUTE_HASH);
+               ok1(attr.hash.fn == ntdb_jenkins_hash);
+               attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
+               ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+               ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK);
+               ok1(attr.flock.lock == ntdb_fcntl_lock);
+               ok1(attr.flock.unlock == ntdb_fcntl_unlock);
+               attr.base.attr = NTDB_ATTRIBUTE_SEED;
+               ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+               ok1(attr.base.attr == NTDB_ATTRIBUTE_SEED);
+               /* This is possible, just astronomically unlikely. */
+               ok1(attr.seed.seed != 0);
+
+               /* Unset attributes. */
+               ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG);
+               ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
+
+               /* Set them. */
+               ok1(ntdb_set_attribute(ntdb, &tap_log_attr) == 0);
+               ok1(ntdb_set_attribute(ntdb, &lock_attr) == 0);
+               /* These should fail. */
+               ok1(ntdb_set_attribute(ntdb, &seed_attr) == NTDB_ERR_EINVAL);
+               ok1(tap_log_messages == 1);
+               ok1(ntdb_set_attribute(ntdb, &hash_attr) == NTDB_ERR_EINVAL);
+               ok1(tap_log_messages == 2);
+               tap_log_messages = 0;
+
+               /* Getting them should work as expected. */
+               attr.base.attr = NTDB_ATTRIBUTE_LOG;
+               ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+               ok1(attr.base.attr == NTDB_ATTRIBUTE_LOG);
+               ok1(attr.log.fn == tap_log_attr.log.fn);
+               ok1(attr.log.data == tap_log_attr.log.data);
+
+               attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
+               ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+               ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK);
+               ok1(attr.flock.lock == mylock);
+               ok1(attr.flock.unlock == myunlock);
+               ok1(attr.flock.data == &lock_attr);
+
+               /* Unset them again. */
+               ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
+               ok1(tap_log_messages == 0);
+               ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG);
+               ok1(tap_log_messages == 0);
+
+               ntdb_close(ntdb);
+               ok1(tap_log_messages == 0);
+
+               /* Now open with all attributes. */
+               ntdb = ntdb_open("run-90-get-set-attributes.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600,
+                              &seed_attr);
+
+               ok1(ntdb);
+
+               /* Get will succeed */
+               attr.base.attr = NTDB_ATTRIBUTE_LOG;
+               ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+               ok1(attr.base.attr == NTDB_ATTRIBUTE_LOG);
+               ok1(attr.log.fn == tap_log_attr.log.fn);
+               ok1(attr.log.data == tap_log_attr.log.data);
+
+               attr.base.attr = NTDB_ATTRIBUTE_HASH;
+               ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+               ok1(attr.base.attr == NTDB_ATTRIBUTE_HASH);
+               ok1(attr.hash.fn == hash_fn);
+               ok1(attr.hash.data == &hash_attr);
+
+               attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
+               ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+               ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK);
+               ok1(attr.flock.lock == mylock);
+               ok1(attr.flock.unlock == myunlock);
+               ok1(attr.flock.data == &lock_attr);
+
+               attr.base.attr = NTDB_ATTRIBUTE_SEED;
+               ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+               ok1(attr.base.attr == NTDB_ATTRIBUTE_SEED);
+               ok1(attr.seed.seed == seed_attr.seed.seed);
+
+               /* Unset attributes. */
+               ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_HASH);
+               ok1(tap_log_messages == 1);
+               ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_SEED);
+               ok1(tap_log_messages == 2);
+               ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
+               ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG);
+               ok1(tap_log_messages == 2);
+               tap_log_messages = 0;
+
+               ntdb_close(ntdb);
+
+       }
+       return exit_status();
+}
similarity index 63%
rename from lib/tdb2/test/run-capabilities.c
rename to lib/ntdb/test/run-capabilities.c
index 1501abbe5cf8dd5a7a286dff7e4d4630f08bebc1..c2c6aa15db84185c11a087e73bba2a4d2feec59e 100644 (file)
@@ -1,5 +1,5 @@
 #include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include "logging.h"
 #include "layout.h"
@@ -18,28 +18,28 @@ static size_t len_of(bool breaks_check, bool breaks_write, bool breaks_open)
        return len;
 }
 
-/* Creates a TDB with various capabilities. */
-static void create_tdb(const char *name,
+/* Creates a NTDB with various capabilities. */
+static void create_ntdb(const char *name,
                       unsigned int cap,
                       bool breaks_check,
                       bool breaks_write,
                       bool breaks_open, ...)
 {
-       TDB_DATA key, data;
+       NTDB_DATA key, data;
        va_list ap;
-       struct tdb_layout *layout;
-       struct tdb_context *tdb;
+       struct ntdb_layout *layout;
+       struct ntdb_context *ntdb;
        int fd;
 
-       key = tdb_mkdata("Hello", 5);
-       data = tdb_mkdata("world", 5);
+       key = ntdb_mkdata("Hello", 5);
+       data = ntdb_mkdata("world", 5);
 
-       /* Create a TDB with some data, and some capabilities */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_used(layout, key, data, 6);
-       tdb_layout_add_free(layout, 80, 0);
-       tdb_layout_add_capability(layout, cap,
+       /* Create a NTDB with some data, and some capabilities */
+       layout = new_ntdb_layout();
+       ntdb_layout_add_freetable(layout);
+       ntdb_layout_add_used(layout, key, data, 6);
+       ntdb_layout_add_free(layout, 80, 0);
+       ntdb_layout_add_capability(layout, cap,
                                  breaks_write, breaks_check, breaks_open,
                                  len_of(breaks_check, breaks_write, breaks_open));
 
@@ -50,9 +50,9 @@ static void create_tdb(const char *name,
                breaks_open = va_arg(ap, int);
 
                key.dsize--;
-               tdb_layout_add_used(layout, key, data, 11 - key.dsize);
-               tdb_layout_add_free(layout, 80, 0);
-               tdb_layout_add_capability(layout, cap,
+               ntdb_layout_add_used(layout, key, data, 11 - key.dsize);
+               ntdb_layout_add_free(layout, 80, 0);
+               ntdb_layout_add_capability(layout, cap,
                                          breaks_write, breaks_check,
                                          breaks_open,
                                          len_of(breaks_check, breaks_write,
@@ -61,23 +61,23 @@ static void create_tdb(const char *name,
        va_end(ap);
 
        /* We open-code this, because we need to use the failtest write. */
-       tdb = tdb_layout_get(layout, failtest_free, &tap_log_attr);
+       ntdb = ntdb_layout_get(layout, failtest_free, &tap_log_attr);
 
        fd = open(name, O_RDWR|O_TRUNC|O_CREAT, 0600);
        if (fd < 0)
                err(1, "opening %s for writing", name);
-       if (write(fd, tdb->file->map_ptr, tdb->file->map_size)
-           != tdb->file->map_size)
+       if (write(fd, ntdb->file->map_ptr, ntdb->file->map_size)
+           != ntdb->file->map_size)
                err(1, "writing %s", name);
        close(fd);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
+       ntdb_close(ntdb);
+       ntdb_layout_free(layout);
 }
 
 /* Note all the "goto out" early exits: they're to shorten failtest time. */
 int main(int argc, char *argv[])
 {
-       struct tdb_context *tdb;
+       struct ntdb_context *ntdb;
        char *summary;
 
        failtest_init(argc, argv);
@@ -87,72 +87,72 @@ int main(int argc, char *argv[])
 
        failtest_suppress = true;
        /* Capability says you can ignore it? */
-       create_tdb("run-capabilities.tdb", 1, false, false, false, 0);
+       create_ntdb("run-capabilities.ntdb", 1, false, false, false, 0);
 
        failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
+       ntdb = ntdb_open("run-capabilities.ntdb", NTDB_DEFAULT, O_RDWR, 0,
                       &tap_log_attr);
        failtest_suppress = true;
-       if (!ok1(tdb))
+       if (!ok1(ntdb))
                goto out;
        ok1(tap_log_messages == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
+       ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
        ok1(tap_log_messages == 0);
-       tdb_close(tdb);
+       ntdb_close(ntdb);
 
        /* Two capabilitues say you can ignore them? */
-       create_tdb("run-capabilities.tdb",
+       create_ntdb("run-capabilities.ntdb",
                   1, false, false, false,
                   2, false, false, false, 0);
 
        failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
+       ntdb = ntdb_open("run-capabilities.ntdb", NTDB_DEFAULT, O_RDWR, 0,
                       &tap_log_attr);
        failtest_suppress = true;
-       if (!ok1(tdb))
+       if (!ok1(ntdb))
                goto out;
        ok1(tap_log_messages == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
+       ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
        ok1(tap_log_messages == 0);
-       ok1(tdb_summary(tdb, 0, &summary) == TDB_SUCCESS);
+       ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
        ok1(strstr(summary, "Capability 1\n"));
        free(summary);
-       tdb_close(tdb);
+       ntdb_close(ntdb);
 
        /* Capability says you can't check. */
-       create_tdb("run-capabilities.tdb",
+       create_ntdb("run-capabilities.ntdb",
                   1, false, false, false,
                   2, true, false, false, 0);
 
        failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
+       ntdb = ntdb_open("run-capabilities.ntdb", NTDB_DEFAULT, O_RDWR, 0,
                       &tap_log_attr);
        failtest_suppress = true;
-       if (!ok1(tdb))
+       if (!ok1(ntdb))
                goto out;
        ok1(tap_log_messages == 0);
-       ok1(tdb_get_flags(tdb) & TDB_CANT_CHECK);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
+       ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK);
+       ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
        /* We expect a warning! */
        ok1(tap_log_messages == 1);
        ok1(strstr(log_last, "capabilit"));
-       ok1(tdb_summary(tdb, 0, &summary) == TDB_SUCCESS);
+       ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
        ok1(strstr(summary, "Capability 1\n"));
        ok1(strstr(summary, "Capability 2 (uncheckable)\n"));
        free(summary);
-       tdb_close(tdb);
+       ntdb_close(ntdb);
 
        /* Capability says you can't write. */
-       create_tdb("run-capabilities.tdb",
+       create_ntdb("run-capabilities.ntdb",
                   1, false, false, false,
                   2, false, true, false, 0);
 
        failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
+       ntdb = ntdb_open("run-capabilities.ntdb", NTDB_DEFAULT, O_RDWR, 0,
                       &tap_log_attr);
        failtest_suppress = true;
        /* We expect a message. */
-       ok1(!tdb);
+       ok1(!ntdb);
        if (!ok1(tap_log_messages == 2))
                goto out;
        if (!ok1(strstr(log_last, "unknown")))
@@ -161,48 +161,48 @@ int main(int argc, char *argv[])
 
        /* We can open it read-only though! */
        failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDONLY, 0,
+       ntdb = ntdb_open("run-capabilities.ntdb", NTDB_DEFAULT, O_RDONLY, 0,
                       &tap_log_attr);
        failtest_suppress = true;
-       if (!ok1(tdb))
+       if (!ok1(ntdb))
                goto out;
        ok1(tap_log_messages == 2);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
+       ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
        ok1(tap_log_messages == 2);
-       ok1(tdb_summary(tdb, 0, &summary) == TDB_SUCCESS);
+       ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
        ok1(strstr(summary, "Capability 1\n"));
        ok1(strstr(summary, "Capability 2 (read-only)\n"));
        free(summary);
-       tdb_close(tdb);
+       ntdb_close(ntdb);
 
        /* Capability says you can't open. */
-       create_tdb("run-capabilities.tdb",
+       create_ntdb("run-capabilities.ntdb",
                   1, false, false, false,
                   2, false, false, true, 0);
 
        failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
+       ntdb = ntdb_open("run-capabilities.ntdb", NTDB_DEFAULT, O_RDWR, 0,
                       &tap_log_attr);
        failtest_suppress = true;
        /* We expect a message. */
-       ok1(!tdb);
+       ok1(!ntdb);
        if (!ok1(tap_log_messages == 3))
                goto out;
        if (!ok1(strstr(log_last, "unknown")))
                goto out;
 
        /* Combine capabilities correctly. */
-       create_tdb("run-capabilities.tdb",
+       create_ntdb("run-capabilities.ntdb",
                   1, false, false, false,
                   2, true, false, false,
                   3, false, true, false, 0);
 
        failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
+       ntdb = ntdb_open("run-capabilities.ntdb", NTDB_DEFAULT, O_RDWR, 0,
                       &tap_log_attr);
        failtest_suppress = true;
        /* We expect a message. */
-       ok1(!tdb);
+       ok1(!ntdb);
        if (!ok1(tap_log_messages == 4))
                goto out;
        if (!ok1(strstr(log_last, "unknown")))
@@ -211,36 +211,36 @@ int main(int argc, char *argv[])
 
        /* We can open it read-only though! */
        failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDONLY, 0,
+       ntdb = ntdb_open("run-capabilities.ntdb", NTDB_DEFAULT, O_RDONLY, 0,
                       &tap_log_attr);
        failtest_suppress = true;
-       if (!ok1(tdb))
+       if (!ok1(ntdb))
                goto out;
        ok1(tap_log_messages == 4);
-       ok1(tdb_get_flags(tdb) & TDB_CANT_CHECK);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
+       ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK);
+       ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
        /* We expect a warning! */
        ok1(tap_log_messages == 5);
        ok1(strstr(log_last, "unknown"));
-       ok1(tdb_summary(tdb, 0, &summary) == TDB_SUCCESS);
+       ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
        ok1(strstr(summary, "Capability 1\n"));
        ok1(strstr(summary, "Capability 2 (uncheckable)\n"));
        ok1(strstr(summary, "Capability 3 (read-only)\n"));
        free(summary);
-       tdb_close(tdb);
+       ntdb_close(ntdb);
 
        /* Two capability flags in one. */
-       create_tdb("run-capabilities.tdb",
+       create_ntdb("run-capabilities.ntdb",
                   1, false, false, false,
                   2, true, true, false,
                   0);
 
        failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
+       ntdb = ntdb_open("run-capabilities.ntdb", NTDB_DEFAULT, O_RDWR, 0,
                       &tap_log_attr);
        failtest_suppress = true;
        /* We expect a message. */
-       ok1(!tdb);
+       ok1(!ntdb);
        if (!ok1(tap_log_messages == 6))
                goto out;
        if (!ok1(strstr(log_last, "unknown")))
@@ -249,22 +249,22 @@ int main(int argc, char *argv[])
 
        /* We can open it read-only though! */
        failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDONLY, 0,
+       ntdb = ntdb_open("run-capabilities.ntdb", NTDB_DEFAULT, O_RDONLY, 0,
                       &tap_log_attr);
        failtest_suppress = true;
-       if (!ok1(tdb))
+       if (!ok1(ntdb))
                goto out;
        ok1(tap_log_messages == 6);
-       ok1(tdb_get_flags(tdb) & TDB_CANT_CHECK);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
+       ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK);
+       ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
        /* We expect a warning! */
        ok1(tap_log_messages == 7);
        ok1(strstr(log_last, "unknown"));
-       ok1(tdb_summary(tdb, 0, &summary) == TDB_SUCCESS);
+       ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
        ok1(strstr(summary, "Capability 1\n"));
        ok1(strstr(summary, "Capability 2 (uncheckable,read-only)\n"));
        free(summary);
-       tdb_close(tdb);
+       ntdb_close(ntdb);
 
 out:
        failtest_exit(exit_status());
diff --git a/lib/ntdb/test/run-expand-in-transaction.c b/lib/ntdb/test/run-expand-in-transaction.c
new file mode 100644 (file)
index 0000000..dadbec7
--- /dev/null
@@ -0,0 +1,36 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = ntdb_mkdata("key", 3);
+       NTDB_DATA data = ntdb_mkdata("data", 4);
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
+
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               size_t size;
+               ntdb = ntdb_open("run-expand-in-transaction.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+
+               size = ntdb->file->map_size;
+               ok1(ntdb_transaction_start(ntdb) == 0);
+               ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+               ok1(ntdb->file->map_size > size);
+               ok1(ntdb_transaction_commit(ntdb) == 0);
+               ok1(ntdb->file->map_size > size);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ntdb_close(ntdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
similarity index 50%
rename from lib/tdb2/test/run-features.c
rename to lib/ntdb/test/run-features.c
index f552fcfb58e0d65637ef4c99acdce330fbf1ca36..0d6b3bce76cee83c219a95c3764e48a845ec12e4 100644 (file)
@@ -1,60 +1,60 @@
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include "logging.h"
 
 int main(int argc, char *argv[])
 {
        unsigned int i, j;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
-       struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
+       struct ntdb_context *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+       NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
+       NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
                uint64_t features;
-               tdb = tdb_open("run-features.tdb", flags[i],
+               ntdb = ntdb_open("run-features.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
                /* Put some stuff in there. */
                for (j = 0; j < 100; j++) {
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                               fail("Storing in tdb");
+                       if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+                               fail("Storing in ntdb");
                }
 
                /* Mess with features fields in hdr. */
-               features = (~TDB_FEATURE_MASK ^ 1);
-               ok1(tdb_write_convert(tdb, offsetof(struct tdb_header,
+               features = (~NTDB_FEATURE_MASK ^ 1);
+               ok1(ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
                                                    features_used),
                                      &features, sizeof(features)) == 0);
-               ok1(tdb_write_convert(tdb, offsetof(struct tdb_header,
+               ok1(ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
                                                    features_offered),
                                      &features, sizeof(features)) == 0);
-               tdb_close(tdb);
+               ntdb_close(ntdb);
 
-               tdb = tdb_open("run-features.tdb", flags[i], O_RDWR, 0,
+               ntdb = ntdb_open("run-features.ntdb", flags[i], O_RDWR, 0,
                               &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
                /* Should not have changed features offered. */
-               ok1(tdb_read_convert(tdb, offsetof(struct tdb_header,
+               ok1(ntdb_read_convert(ntdb, offsetof(struct ntdb_header,
                                                   features_offered),
                                     &features, sizeof(features)) == 0);
-               ok1(features == (~TDB_FEATURE_MASK ^ 1));
+               ok1(features == (~NTDB_FEATURE_MASK ^ 1));
 
                /* Should have cleared unknown bits in features_used. */
-               ok1(tdb_read_convert(tdb, offsetof(struct tdb_header,
+               ok1(ntdb_read_convert(ntdb, offsetof(struct ntdb_header,
                                                   features_used),
                                     &features, sizeof(features)) == 0);
-               ok1(features == (1 & TDB_FEATURE_MASK));
+               ok1(features == (1 & NTDB_FEATURE_MASK));
 
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
        ok1(tap_log_messages == 0);
similarity index 70%
rename from lib/tdb2/test/run-lockall.c
rename to lib/ntdb/test/run-lockall.c
index 3ae0d14f654c2d0f039df575350d1f1cb7b764b1..964164e20b355ca305e2fc619618ff8808a56ba8 100644 (file)
@@ -3,25 +3,24 @@
 #include "lock-tracking.h"
 
 #define fcntl fcntl_with_lockcheck
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 
 #include "tap-interface.h"
 #include <stdlib.h>
 #include <stdbool.h>
 #include <stdarg.h>
-#include <ccan/err/err.h>
 #include "external-agent.h"
 #include "logging.h"
 
-#define TEST_DBNAME "run-lockall.tdb"
+#define TEST_DBNAME "run-lockall.ntdb"
 
 #undef fcntl
 
 int main(int argc, char *argv[])
 {
        struct agent *agent;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
        int i;
 
        plan_tests(13 * sizeof(flags)/sizeof(flags[0]) + 1);
@@ -31,38 +30,38 @@ int main(int argc, char *argv[])
 
        for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) {
                enum agent_return ret;
-               struct tdb_context *tdb;
+               struct ntdb_context *ntdb;
 
-               tdb = tdb_open(TEST_DBNAME, flags[i],
+               ntdb = ntdb_open(TEST_DBNAME, flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
+               ok1(ntdb);
 
                ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
                ok1(ret == SUCCESS);
 
-               ok1(tdb_lockall(tdb) == TDB_SUCCESS);
+               ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
                ok1(external_agent_operation(agent, STORE, "key")
                    == WOULD_HAVE_BLOCKED);
                ok1(external_agent_operation(agent, FETCH, "key")
                    == WOULD_HAVE_BLOCKED);
                /* Test nesting. */
-               ok1(tdb_lockall(tdb) == TDB_SUCCESS);
-               tdb_unlockall(tdb);
-               tdb_unlockall(tdb);
+               ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
+               ntdb_unlockall(ntdb);
+               ntdb_unlockall(ntdb);
 
                ok1(external_agent_operation(agent, STORE, "key") == SUCCESS);
 
-               ok1(tdb_lockall_read(tdb) == TDB_SUCCESS);
+               ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS);
                ok1(external_agent_operation(agent, STORE, "key")
                    == WOULD_HAVE_BLOCKED);
                ok1(external_agent_operation(agent, FETCH, "key") == SUCCESS);
-               ok1(tdb_lockall_read(tdb) == TDB_SUCCESS);
-               tdb_unlockall_read(tdb);
-               tdb_unlockall_read(tdb);
+               ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS);
+               ntdb_unlockall_read(ntdb);
+               ntdb_unlockall_read(ntdb);
 
                ok1(external_agent_operation(agent, STORE, "key") == SUCCESS);
                ok1(external_agent_operation(agent, CLOSE, NULL) == SUCCESS);
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
        free_external_agent(agent);
similarity index 58%
rename from lib/tdb2/test/run-remap-in-read_traverse.c
rename to lib/ntdb/test/run-remap-in-read_traverse.c
index 16a1baab468a3aedb0b78b60a3702943f8f1538d..2d817c2d734044069da86f5bba2d963d79be98f7 100644 (file)
@@ -1,11 +1,11 @@
-#include "tdb2-source.h"
-/* We had a bug where we marked the tdb read-only for a tdb_traverse_read.
- * If we then expanded the tdb, we would remap read-only, and later SEGV. */
+#include "ntdb-source.h"
+/* We had a bug where we marked the ntdb read-only for a ntdb_traverse_read.
+ * If we then expanded the ntdb, we would remap read-only, and later SEGV. */
 #include "tap-interface.h"
 #include "external-agent.h"
 #include "logging.h"
 
-static bool file_larger(int fd, tdb_len_t size)
+static bool file_larger(int fd, ntdb_len_t size)
 {
        struct stat st;
 
@@ -13,7 +13,7 @@ static bool file_larger(int fd, tdb_len_t size)
        return st.st_size != size;
 }
 
-static unsigned add_records_to_grow(struct agent *agent, int fd, tdb_len_t size)
+static unsigned add_records_to_grow(struct agent *agent, int fd, ntdb_len_t size)
 {
        unsigned int i;
 
@@ -31,27 +31,27 @@ int main(int argc, char *argv[])
 {
        unsigned int i;
        struct agent *agent;
-       struct tdb_context *tdb;
-       struct tdb_data d = tdb_mkdata("hello", 5);
-       const char filename[] = "run-remap-in-read_traverse.tdb";
+       struct ntdb_context *ntdb;
+       NTDB_DATA d = ntdb_mkdata("hello", 5);
+       const char filename[] = "run-remap-in-read_traverse.ntdb";
 
        plan_tests(4);
 
        agent = prepare_external_agent();
 
-       tdb = tdb_open(filename, TDB_DEFAULT,
+       ntdb = ntdb_open(filename, NTDB_DEFAULT,
                       O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
 
        ok1(external_agent_operation(agent, OPEN, filename) == SUCCESS);
-       i = add_records_to_grow(agent, tdb->file->fd, tdb->file->map_size);
+       i = add_records_to_grow(agent, ntdb->file->fd, ntdb->file->map_size);
 
        /* Do a traverse. */
-       ok1(tdb_traverse(tdb, NULL, NULL) == i);
+       ok1(ntdb_traverse(ntdb, NULL, NULL) == i);
 
        /* Now store something! */
-       ok1(tdb_store(tdb, d, d, TDB_INSERT) == 0);
+       ok1(ntdb_store(ntdb, d, d, NTDB_INSERT) == 0);
        ok1(tap_log_messages == 0);
-       tdb_close(tdb);
+       ntdb_close(ntdb);
        free_external_agent(agent);
        return exit_status();
 }
diff --git a/lib/ntdb/test/run-seed.c b/lib/ntdb/test/run-seed.c
new file mode 100644 (file)
index 0000000..2514f72
--- /dev/null
@@ -0,0 +1,61 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+
+static int log_count = 0;
+
+/* Normally we get a log when setting random seed. */
+static void my_log_fn(struct ntdb_context *ntdb,
+                     enum ntdb_log_level level,
+                     enum NTDB_ERROR ecode,
+                     const char *message, void *priv)
+{
+       log_count++;
+}
+
+static union ntdb_attribute log_attr = {
+       .log = { .base = { .attr = NTDB_ATTRIBUTE_LOG },
+                .fn = my_log_fn }
+};
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct ntdb_context *ntdb;
+       union ntdb_attribute attr;
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+
+       attr.seed.base.attr = NTDB_ATTRIBUTE_SEED;
+       attr.seed.base.next = &log_attr;
+       attr.seed.seed = 42;
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 4 * 3);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               struct ntdb_header hdr;
+               int fd;
+               ntdb = ntdb_open("run-seed.ntdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &attr);
+               ok1(ntdb);
+               if (!ntdb)
+                       continue;
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+               ok1(ntdb->hash_seed == 42);
+               ok1(log_count == 0);
+               ntdb_close(ntdb);
+
+               if (flags[i] & NTDB_INTERNAL)
+                       continue;
+
+               fd = open("run-seed.ntdb", O_RDONLY);
+               ok1(fd >= 0);
+               ok1(read(fd, &hdr, sizeof(hdr)) == sizeof(hdr));
+               if (flags[i] & NTDB_CONVERT)
+                       ok1(bswap_64(hdr.hash_seed) == 42);
+               else
+                       ok1(hdr.hash_seed == 42);
+               close(fd);
+       }
+       return exit_status();
+}
diff --git a/lib/ntdb/test/run-tdb_errorstr.c b/lib/ntdb/test/run-tdb_errorstr.c
new file mode 100644 (file)
index 0000000..5b02314
--- /dev/null
@@ -0,0 +1,52 @@
+#include "ntdb-source.h"
+#include "tap-interface.h"
+
+int main(int argc, char *argv[])
+{
+       enum NTDB_ERROR e;
+       plan_tests(NTDB_ERR_RDONLY*-1 + 2);
+
+       for (e = NTDB_SUCCESS; e >= NTDB_ERR_RDONLY; e--) {
+               switch (e) {
+               case NTDB_SUCCESS:
+                       ok1(!strcmp(ntdb_errorstr(e),
+                                   "Success"));
+                       break;
+               case NTDB_ERR_IO:
+                       ok1(!strcmp(ntdb_errorstr(e),
+                                   "IO Error"));
+                       break;
+               case NTDB_ERR_LOCK:
+                       ok1(!strcmp(ntdb_errorstr(e),
+                                   "Locking error"));
+                       break;
+               case NTDB_ERR_OOM:
+                       ok1(!strcmp(ntdb_errorstr(e),
+                                   "Out of memory"));
+                       break;
+               case NTDB_ERR_EXISTS:
+                       ok1(!strcmp(ntdb_errorstr(e),
+                                   "Record exists"));
+                       break;
+               case NTDB_ERR_EINVAL:
+                       ok1(!strcmp(ntdb_errorstr(e),
+                                   "Invalid parameter"));
+                       break;
+               case NTDB_ERR_NOEXIST:
+                       ok1(!strcmp(ntdb_errorstr(e),
+                                   "Record does not exist"));
+                       break;
+               case NTDB_ERR_RDONLY:
+                       ok1(!strcmp(ntdb_errorstr(e),
+                                   "write not permitted"));
+                       break;
+               case NTDB_ERR_CORRUPT:
+                       ok1(!strcmp(ntdb_errorstr(e),
+                                   "Corrupt database"));
+                       break;
+               }
+       }
+       ok1(!strcmp(ntdb_errorstr(e), "Invalid error code"));
+
+       return exit_status();
+}
similarity index 54%
rename from lib/tdb2/test/run-tdb_foreach.c
rename to lib/ntdb/test/run-tdb_foreach.c
index b1eb2de21744b48b7a97e0b2d93ed0819df65fa3..f1a2d009195f5ce4925c97961caa3f9f0cf73fc5 100644 (file)
@@ -1,23 +1,23 @@
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include "logging.h"
 
-static int drop_count(struct tdb_context *tdb, unsigned int *count)
+static int drop_count(struct ntdb_context *ntdb, unsigned int *count)
 {
        if (--(*count) == 0)
                return 1;
        return 0;
 }
 
-static int set_found(struct tdb_context *tdb, bool found[3])
+static int set_found(struct ntdb_context *ntdb, bool found[3])
 {
        unsigned int idx;
 
-       if (strcmp(tdb_name(tdb), "run-tdb_foreach0.tdb") == 0)
+       if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach0.ntdb") == 0)
                idx = 0;
-       else if (strcmp(tdb_name(tdb), "run-tdb_foreach1.tdb") == 0)
+       else if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach1.ntdb") == 0)
                idx = 1;
-       else if (strcmp(tdb_name(tdb), "run-tdb_foreach2.tdb") == 0)
+       else if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach2.ntdb") == 0)
                idx = 2;
        else
                abort();
@@ -32,52 +32,52 @@ int main(int argc, char *argv[])
 {
        unsigned int i, count;
        bool found[3];
-       struct tdb_context *tdb0, *tdb1, *tdb2;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
+       struct ntdb_context *ntdb0, *ntdb1, *ntdb;
+       int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 8);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb0 = tdb_open("run-tdb_foreach0.tdb", flags[i],
+               ntdb0 = ntdb_open("run-ntdb_foreach0.ntdb", flags[i],
                                O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               tdb1 = tdb_open("run-tdb_foreach1.tdb", flags[i],
+               ntdb1 = ntdb_open("run-ntdb_foreach1.ntdb", flags[i],
                                O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               tdb2 = tdb_open("run-tdb_foreach2.tdb", flags[i],
+               ntdb = ntdb_open("run-ntdb_foreach2.ntdb", flags[i],
                                O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
 
                memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
+               ntdb_foreach(set_found, found);
                ok1(found[0] && found[1] && found[2]);
 
                /* Test premature iteration termination */
                count = 1;
-               tdb_foreach(drop_count, &count);
+               ntdb_foreach(drop_count, &count);
                ok1(count == 0);
 
-               tdb_close(tdb1);
+               ntdb_close(ntdb1);
                memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
+               ntdb_foreach(set_found, found);
                ok1(found[0] && !found[1] && found[2]);
 
-               tdb_close(tdb2);
+               ntdb_close(ntdb);
                memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
+               ntdb_foreach(set_found, found);
                ok1(found[0] && !found[1] && !found[2]);
 
-               tdb1 = tdb_open("run-tdb_foreach1.tdb", flags[i],
+               ntdb1 = ntdb_open("run-ntdb_foreach1.ntdb", flags[i],
                                O_RDWR, 0600, &tap_log_attr);
                memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
+               ntdb_foreach(set_found, found);
                ok1(found[0] && found[1] && !found[2]);
 
-               tdb_close(tdb0);
+               ntdb_close(ntdb0);
                memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
+               ntdb_foreach(set_found, found);
                ok1(!found[0] && found[1] && !found[2]);
 
-               tdb_close(tdb1);
+               ntdb_close(ntdb1);
                memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
+               ntdb_foreach(set_found, found);
                ok1(!found[0] && !found[1] && !found[2]);
                ok1(tap_log_messages == 0);
        }
similarity index 66%
rename from lib/tdb2/test/run-traverse.c
rename to lib/ntdb/test/run-traverse.c
index 20d610fe6601097c9bc265a294d2379f7e9c2eff..9dfc94d3b3ec289bfdb6db9cb2bb6bb558cb7a55 100644 (file)
@@ -1,4 +1,4 @@
-#include "tdb2-source.h"
+#include "ntdb-source.h"
 #include "tap-interface.h"
 #include "logging.h"
 
@@ -11,14 +11,14 @@ static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
                             *(uint64_t *)p);
 }
 
-static bool store_records(struct tdb_context *tdb)
+static bool store_records(struct ntdb_context *ntdb)
 {
        int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+       NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
 
        for (i = 0; i < NUM_RECORDS; i++)
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
+               if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
                        return false;
        return true;
 }
@@ -28,10 +28,10 @@ struct trav_data {
        int low, high;
        bool mismatch;
        bool delete;
-       enum TDB_ERROR delete_error;
+       enum NTDB_ERROR delete_error;
 };
 
-static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
+static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
                struct trav_data *td)
 {
        int val;
@@ -49,8 +49,8 @@ static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
                td->high = val;
 
        if (td->delete) {
-               td->delete_error = tdb_delete(tdb, key);
-               if (td->delete_error != TDB_SUCCESS) {
+               td->delete_error = ntdb_delete(ntdb, key);
+               if (td->delete_error != NTDB_SUCCESS) {
                        return -1;
                }
        }
@@ -64,10 +64,10 @@ struct trav_grow_data {
        unsigned int calls;
        unsigned int num_large;
        bool mismatch;
-       enum TDB_ERROR error;
+       enum NTDB_ERROR error;
 };
 
-static int trav_grow(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
+static int trav_grow(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
                     struct trav_grow_data *tgd)
 {
        int val;
@@ -87,8 +87,8 @@ static int trav_grow(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
        /* Make a big difference to the database. */
        dbuf.dptr = buffer;
        dbuf.dsize = sizeof(buffer);
-       tgd->error = tdb_append(tdb, key, dbuf);
-       if (tgd->error != TDB_SUCCESS) {
+       tgd->error = ntdb_append(ntdb, key, dbuf);
+       if (tgd->error != NTDB_SUCCESS) {
                return -1;
        }
        return 0;
@@ -100,12 +100,12 @@ int main(int argc, char *argv[])
        int num;
        struct trav_data td;
        struct trav_grow_data tgd;
-       struct tdb_context *tdb;
+       struct ntdb_context *ntdb;
        uint64_t seed = 16014841315512641303ULL;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
+       int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+                       NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+                       NTDB_NOMMAP|NTDB_CONVERT };
+       union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
                                                .fn = fixedhash,
                                                .data = &seed } };
 
@@ -113,16 +113,16 @@ int main(int argc, char *argv[])
 
        plan_tests(sizeof(flags) / sizeof(flags[0]) * 32 + 1);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-traverse.tdb", flags[i],
+               ntdb = ntdb_open("run-traverse.ntdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
-               ok1(tdb);
-               if (!tdb)
+               ok1(ntdb);
+               if (!ntdb)
                        continue;
 
-               ok1(tdb_traverse(tdb, NULL, NULL) == 0);
+               ok1(ntdb_traverse(ntdb, NULL, NULL) == 0);
 
-               ok1(store_records(tdb));
-               num = tdb_traverse(tdb, NULL, NULL);
+               ok1(store_records(ntdb));
+               num = ntdb_traverse(ntdb, NULL, NULL);
                ok1(num == NUM_RECORDS);
 
                /* Full traverse. */
@@ -133,7 +133,7 @@ int main(int argc, char *argv[])
                td.mismatch = false;
                td.delete = false;
 
-               num = tdb_traverse(tdb, trav, &td);
+               num = ntdb_traverse(ntdb, trav, &td);
                ok1(num == NUM_RECORDS);
                ok1(!td.mismatch);
                ok1(td.calls == NUM_RECORDS);
@@ -148,13 +148,13 @@ int main(int argc, char *argv[])
                td.mismatch = false;
                td.delete = false;
 
-               num = tdb_traverse(tdb, trav, &td);
+               num = ntdb_traverse(ntdb, trav, &td);
                ok1(num == NUM_RECORDS / 2);
                ok1(!td.mismatch);
                ok1(td.calls == NUM_RECORDS / 2);
                ok1(td.low <= NUM_RECORDS / 2);
                ok1(td.high > NUM_RECORDS / 2);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
                ok1(tap_log_messages == 0);
 
                /* Deleting traverse (delete everything). */
@@ -164,38 +164,38 @@ int main(int argc, char *argv[])
                td.high = INT_MIN;
                td.mismatch = false;
                td.delete = true;
-               td.delete_error = TDB_SUCCESS;
-               num = tdb_traverse(tdb, trav, &td);
+               td.delete_error = NTDB_SUCCESS;
+               num = ntdb_traverse(ntdb, trav, &td);
                ok1(num == NUM_RECORDS);
-               ok1(td.delete_error == TDB_SUCCESS);
+               ok1(td.delete_error == NTDB_SUCCESS);
                ok1(!td.mismatch);
                ok1(td.calls == NUM_RECORDS);
                ok1(td.low == 0);
                ok1(td.high == NUM_RECORDS - 1);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
 
                /* Now it's empty! */
-               ok1(tdb_traverse(tdb, NULL, NULL) == 0);
+               ok1(ntdb_traverse(ntdb, NULL, NULL) == 0);
 
                /* Re-add. */
-               ok1(store_records(tdb));
-               ok1(tdb_traverse(tdb, NULL, NULL) == NUM_RECORDS);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
+               ok1(store_records(ntdb));
+               ok1(ntdb_traverse(ntdb, NULL, NULL) == NUM_RECORDS);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
 
                /* Grow.  This will cause us to be reshuffled. */
                tgd.calls = 0;
                tgd.num_large = 0;
                tgd.mismatch = false;
-               tgd.error = TDB_SUCCESS;
-               ok1(tdb_traverse(tdb, trav_grow, &tgd) > 1);
+               tgd.error = NTDB_SUCCESS;
+               ok1(ntdb_traverse(ntdb, trav_grow, &tgd) > 1);
                ok1(tgd.error == 0);
                ok1(!tgd.mismatch);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
+               ok1(ntdb_check(ntdb, NULL, NULL) == 0);
                ok1(tgd.num_large < tgd.calls);
                diag("growing db: %u calls, %u repeats",
                     tgd.calls, tgd.num_large);
 
-               tdb_close(tdb);
+               ntdb_close(ntdb);
        }
 
        ok1(tap_log_messages == 0);
diff --git a/lib/ntdb/tools/Makefile b/lib/ntdb/tools/Makefile
new file mode 100644 (file)
index 0000000..087c256
--- /dev/null
@@ -0,0 +1,16 @@
+OBJS:=../../ntdb.o ../../hash.o ../../tally.o
+CFLAGS:=-I../../.. -I.. -Wall -g -O3 #-g -pg
+LDFLAGS:=-L../../..
+
+default: ntdbtorture ntdbtool ntdbdump ntdbrestore mkntdb speed growtdb-bench
+
+ntdbdump: ntdbdump.c $(OBJS)
+ntdbrestore: ntdbrestore.c $(OBJS)
+ntdbtorture: ntdbtorture.c $(OBJS)
+ntdbtool: ntdbtool.c $(OBJS)
+mkntdb: mkntdb.c $(OBJS)
+speed: speed.c $(OBJS)
+growtdb-bench: growtdb-bench.c $(OBJS)
+
+clean:
+       rm -f ntdbtorture ntdbdump ntdbrestore ntdbtool mkntdb speed growtdb-bench
diff --git a/lib/ntdb/tools/growtdb-bench.c b/lib/ntdb/tools/growtdb-bench.c
new file mode 100644 (file)
index 0000000..640f87a
--- /dev/null
@@ -0,0 +1,114 @@
+#include "ntdb.h"
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <ccan/err/err.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+static void logfn(struct ntdb_context *ntdb,
+                 enum ntdb_log_level level,
+                 enum NTDB_ERROR ecode,
+                 const char *message,
+                 void *data)
+{
+       fprintf(stderr, "ntdb:%s:%s:%s\n",
+               ntdb_name(ntdb), ntdb_errorstr(ecode), message);
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int i, j, users, groups;
+       NTDB_DATA idxkey, idxdata;
+       NTDB_DATA k, d, gk;
+       char cmd[100];
+       struct ntdb_context *ntdb;
+       enum NTDB_ERROR ecode;
+       union ntdb_attribute log;
+
+       if (argc != 3) {
+               printf("Usage: growtdb-bench <users> <groups>\n");
+               exit(1);
+       }
+       users = atoi(argv[1]);
+       groups = atoi(argv[2]);
+
+       sprintf(cmd, "cat /proc/%i/statm", getpid());
+
+       log.base.attr = NTDB_ATTRIBUTE_LOG;
+       log.base.next = NULL;
+       log.log.fn = logfn;
+
+       ntdb = ntdb_open("/tmp/growtdb.ntdb", NTDB_DEFAULT,
+                      O_RDWR|O_CREAT|O_TRUNC, 0600, &log);
+
+       idxkey.dptr = (unsigned char *)"User index";
+       idxkey.dsize = strlen("User index");
+       idxdata.dsize = 51;
+       idxdata.dptr = calloc(idxdata.dsize, 1);
+
+       /* Create users. */
+       k.dsize = 48;
+       k.dptr = calloc(k.dsize, 1);
+       d.dsize = 64;
+       d.dptr = calloc(d.dsize, 1);
+
+       ntdb_transaction_start(ntdb);
+       for (i = 0; i < users; i++) {
+               memcpy(k.dptr, &i, sizeof(i));
+               ecode = ntdb_store(ntdb, k, d, NTDB_INSERT);
+               if (ecode != NTDB_SUCCESS)
+                       errx(1, "ntdb insert failed: %s", ntdb_errorstr(ecode));
+
+               /* This simulates a growing index record. */
+               ecode = ntdb_append(ntdb, idxkey, idxdata);
+               if (ecode != NTDB_SUCCESS)
+                       errx(1, "ntdb append failed: %s", ntdb_errorstr(ecode));
+       }
+       if ((ecode = ntdb_transaction_commit(ntdb)) != 0)
+               errx(1, "ntdb commit1 failed: %s", ntdb_errorstr(ecode));
+
+       if ((ecode = ntdb_check(ntdb, NULL, NULL)) != 0)
+               errx(1, "ntdb_check failed after initial insert!");
+
+       system(cmd);
+
+       /* Now put them all in groups: add 32 bytes to each record for
+        * a group. */
+       gk.dsize = 48;
+       gk.dptr = calloc(k.dsize, 1);
+       gk.dptr[gk.dsize-1] = 1;
+
+       d.dsize = 32;
+       for (i = 0; i < groups; i++) {
+               ntdb_transaction_start(ntdb);
+               /* Create the "group". */
+               memcpy(gk.dptr, &i, sizeof(i));
+               ecode = ntdb_store(ntdb, gk, d, NTDB_INSERT);
+               if (ecode != NTDB_SUCCESS)
+                       errx(1, "ntdb insert failed: %s", ntdb_errorstr(ecode));
+
+               /* Now populate it. */
+               for (j = 0; j < users; j++) {
+                       /* Append to the user. */
+                       memcpy(k.dptr, &j, sizeof(j));
+                       if ((ecode = ntdb_append(ntdb, k, d)) != 0)
+                               errx(1, "ntdb append failed: %s",
+                                    ntdb_errorstr(ecode));
+
+                       /* Append to the group. */
+                       if ((ecode = ntdb_append(ntdb, gk, d)) != 0)
+                               errx(1, "ntdb append failed: %s",
+                                    ntdb_errorstr(ecode));
+               }
+               if ((ecode = ntdb_transaction_commit(ntdb)) != 0)
+                       errx(1, "ntdb commit2 failed: %s", ntdb_errorstr(ecode));
+               if ((ecode = ntdb_check(ntdb, NULL, NULL)) != 0)
+                       errx(1, "ntdb_check failed after iteration %i!", i);
+               system(cmd);
+       }
+
+       return 0;
+}
similarity index 69%
rename from lib/tdb2/tools/mktdb2.c
rename to lib/ntdb/tools/mkntdb.c
index 35d7a07d0bc514830017b7cd11c1075dd73856aa..e728987a533fd75ab50b8c2f0788ff2f290533c9 100644 (file)
@@ -1,4 +1,4 @@
-#include "tdb2.h"
+#include "ntdb.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <fcntl.h>
@@ -7,21 +7,21 @@
 int main(int argc, char *argv[])
 {
        unsigned int i, num_recs;
-       struct tdb_context *tdb;
+       struct ntdb_context *ntdb;
 
        if (argc != 3 || (num_recs = atoi(argv[2])) == 0)
                errx(1, "Usage: mktdb <tdbfile> <numrecords>");
 
-       tdb = tdb_open(argv[1], TDB_DEFAULT, O_CREAT|O_TRUNC|O_RDWR, 0600,NULL);
-       if (!tdb)
+       ntdb = ntdb_open(argv[1], NTDB_DEFAULT, O_CREAT|O_TRUNC|O_RDWR, 0600,NULL);
+       if (!ntdb)
                err(1, "Opening %s", argv[1]);
 
        for (i = 0; i < num_recs; i++) {
-               TDB_DATA d;
+               NTDB_DATA d;
 
                d.dptr = (void *)&i;
                d.dsize = sizeof(i);
-               if (tdb_store(tdb, d, d, TDB_INSERT) != 0)
+               if (ntdb_store(ntdb, d, d, NTDB_INSERT) != 0)
                        err(1, "Failed to store record %i", i);
        }
        printf("Done\n");
similarity index 62%
rename from lib/tdb2/tools/tdb2backup.c
rename to lib/ntdb/tools/ntdbbackup.c
index 37b301c548217b8057897bc10f104aac76aa1544..a76f18491b9e69d71d88ad1226ef7d0c42af770f 100644 (file)
@@ -1,6 +1,6 @@
 /*
    Unix SMB/CIFS implementation.
-   low level tdb backup and restore utility
+   low level ntdb backup and restore utility
    Copyright (C) Andrew Tridgell              2002
 
    This program is free software; you can redistribute it and/or modify
 
 /*
 
-  This program is meant for backup/restore of tdb databases. Typical usage would be:
-     tdbbackup *.tdb
+  This program is meant for backup/restore of ntdb databases. Typical usage would be:
+     tdbbackup *.ntdb
   when Samba shuts down cleanly, which will make a backup of all the local databases
   to *.bak files. Then on Samba startup you would use:
-     tdbbackup -v *.tdb
+     tdbbackup -v *.ntdb
   and this will check the databases for corruption and if corruption is detected then
   the backup will be restored.
 
@@ -41,7 +41,7 @@
  */
 
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include "system/filesys.h"
 
 #ifdef HAVE_GETOPT_H
 
 static int failed;
 
-static void tdb_log(struct tdb_context *tdb,
-                   enum tdb_log_level level,
-                   enum TDB_ERROR ecode,
+static void ntdb_log(struct ntdb_context *ntdb,
+                   enum ntdb_log_level level,
+                   enum NTDB_ERROR ecode,
                    const char *message,
                    void *data)
 {
-       fprintf(stderr, "%s:%s\n", tdb_errorstr(ecode), message);
+       fprintf(stderr, "%s:%s\n", ntdb_errorstr(ecode), message);
 }
 
 static char *add_suffix(const char *name, const char *suffix)
@@ -72,15 +72,15 @@ static char *add_suffix(const char *name, const char *suffix)
        return ret;
 }
 
-static int copy_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
+static int copy_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
 {
-       struct tdb_context *tdb_new = (struct tdb_context *)state;
-       enum TDB_ERROR err;
+       struct ntdb_context *ntdb_new = (struct ntdb_context *)state;
+       enum NTDB_ERROR err;
 
-       err = tdb_store(tdb_new, key, dbuf, TDB_INSERT);
+       err = ntdb_store(ntdb_new, key, dbuf, NTDB_INSERT);
        if (err) {
                fprintf(stderr,"Failed to insert into %s: %s\n",
-                       tdb_name(tdb_new), tdb_errorstr(err));
+                       ntdb_name(ntdb_new), ntdb_errorstr(err));
                failed = 1;
                return 1;
        }
@@ -88,75 +88,75 @@ static int copy_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *s
 }
 
 
-static int test_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
+static int test_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
 {
        return 0;
 }
 
 /*
-  carefully backup a tdb, validating the contents and
+  carefully backup a ntdb, validating the contents and
   only doing the backup if its OK
   this function is also used for restore
 */
-static int backup_tdb(const char *old_name, const char *new_name)
+static int backup_ntdb(const char *old_name, const char *new_name)
 {
-       struct tdb_context *tdb;
-       struct tdb_context *tdb_new;
+       struct ntdb_context *ntdb;
+       struct ntdb_context *ntdb_new;
        char *tmp_name;
        struct stat st;
        int count1, count2;
-       enum TDB_ERROR err;
-       union tdb_attribute log_attr;
+       enum NTDB_ERROR err;
+       union ntdb_attribute log_attr;
 
        tmp_name = add_suffix(new_name, ".tmp");
 
-       /* stat the old tdb to find its permissions */
+       /* stat the old ntdb to find its permissions */
        if (stat(old_name, &st) != 0) {
                perror(old_name);
                free(tmp_name);
                return 1;
        }
 
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
+       log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
        log_attr.base.next = NULL;
-       log_attr.log.fn = tdb_log;
+       log_attr.log.fn = ntdb_log;
 
-       /* open the old tdb */
-       tdb = tdb_open(old_name, TDB_DEFAULT, O_RDWR, 0, &log_attr);
-       if (!tdb) {
+       /* open the old ntdb */
+       ntdb = ntdb_open(old_name, NTDB_DEFAULT, O_RDWR, 0, &log_attr);
+       if (!ntdb) {
                printf("Failed to open %s\n", old_name);
                free(tmp_name);
                return 1;
        }
 
        unlink(tmp_name);
-       tdb_new = tdb_open(tmp_name, TDB_DEFAULT,
+       ntdb_new = ntdb_open(tmp_name, NTDB_DEFAULT,
                           O_RDWR|O_CREAT|O_EXCL, st.st_mode & 0777,
                           &log_attr);
-       if (!tdb_new) {
+       if (!ntdb_new) {
                perror(tmp_name);
                free(tmp_name);
                return 1;
        }
 
-       err = tdb_transaction_start(tdb);
+       err = ntdb_transaction_start(ntdb);
        if (err) {
-               fprintf(stderr, "Failed to start transaction on old tdb: %s\n",
-                       tdb_errorstr(err));
-               tdb_close(tdb);
-               tdb_close(tdb_new);
+               fprintf(stderr, "Failed to start transaction on old ntdb: %s\n",
+                       ntdb_errorstr(err));
+               ntdb_close(ntdb);
+               ntdb_close(ntdb_new);
                unlink(tmp_name);
                free(tmp_name);
                return 1;
        }
 
-       /* lock the backup tdb so that nobody else can change it */
-       err = tdb_lockall(tdb_new);
+       /* lock the backup ntdb so that nobody else can change it */
+       err = ntdb_lockall(ntdb_new);
        if (err) {
-               fprintf(stderr, "Failed to lock backup tdb: %s\n",
-                       tdb_errorstr(err));
-               tdb_close(tdb);
-               tdb_close(tdb_new);
+               fprintf(stderr, "Failed to lock backup ntdb: %s\n",
+                       ntdb_errorstr(err));
+               ntdb_close(ntdb);
+               ntdb_close(ntdb_new);
                unlink(tmp_name);
                free(tmp_name);
                return 1;
@@ -165,39 +165,39 @@ static int backup_tdb(const char *old_name, const char *new_name)
        failed = 0;
 
        /* traverse and copy */
-       count1 = tdb_traverse(tdb, copy_fn, (void *)tdb_new);
+       count1 = ntdb_traverse(ntdb, copy_fn, (void *)ntdb_new);
        if (count1 < 0 || failed) {
                fprintf(stderr,"failed to copy %s\n", old_name);
-               tdb_close(tdb);
-               tdb_close(tdb_new);
+               ntdb_close(ntdb);
+               ntdb_close(ntdb_new);
                unlink(tmp_name);
                free(tmp_name);
                return 1;
        }
 
-       /* close the old tdb */
-       tdb_close(tdb);
+       /* close the old ntdb */
+       ntdb_close(ntdb);
 
-       /* copy done, unlock the backup tdb */
-       tdb_unlockall(tdb_new);
+       /* copy done, unlock the backup ntdb */
+       ntdb_unlockall(ntdb_new);
 
 #ifdef HAVE_FDATASYNC
-       if (fdatasync(tdb_fd(tdb_new)) != 0) {
+       if (fdatasync(ntdb_fd(ntdb_new)) != 0) {
 #else
-       if (fsync(tdb_fd(tdb_new)) != 0) {
+       if (fsync(ntdb_fd(ntdb_new)) != 0) {
 #endif
                /* not fatal */
                fprintf(stderr, "failed to fsync backup file\n");
        }
 
-       /* close the new tdb and re-open read-only */
-       tdb_close(tdb_new);
+       /* close the new ntdb and re-open read-only */
+       ntdb_close(ntdb_new);
 
        /* we don't need the hash attr any more */
        log_attr.base.next = NULL;
 
-       tdb_new = tdb_open(tmp_name, TDB_DEFAULT, O_RDONLY, 0, &log_attr);
-       if (!tdb_new) {
+       ntdb_new = ntdb_open(tmp_name, NTDB_DEFAULT, O_RDONLY, 0, &log_attr);
+       if (!ntdb_new) {
                fprintf(stderr,"failed to reopen %s\n", tmp_name);
                unlink(tmp_name);
                perror(tmp_name);
@@ -205,18 +205,18 @@ static int backup_tdb(const char *old_name, const char *new_name)
                return 1;
        }
 
-       /* traverse the new tdb to confirm */
-       count2 = tdb_traverse(tdb_new, test_fn, NULL);
+       /* traverse the new ntdb to confirm */
+       count2 = ntdb_traverse(ntdb_new, test_fn, NULL);
        if (count2 != count1) {
                fprintf(stderr,"failed to copy %s\n", old_name);
-               tdb_close(tdb_new);
+               ntdb_close(ntdb_new);
                unlink(tmp_name);
                free(tmp_name);
                return 1;
        }
 
-       /* close the new tdb and rename it to .bak */
-       tdb_close(tdb_new);
+       /* close the new ntdb and rename it to .bak */
+       ntdb_close(ntdb_new);
        if (rename(tmp_name, new_name) != 0) {
                perror(new_name);
                free(tmp_name);
@@ -229,31 +229,31 @@ static int backup_tdb(const char *old_name, const char *new_name)
 }
 
 /*
-  verify a tdb and if it is corrupt then restore from *.bak
+  verify a ntdb and if it is corrupt then restore from *.bak
 */
-static int verify_tdb(const char *fname, const char *bak_name)
+static int verify_ntdb(const char *fname, const char *bak_name)
 {
-       struct tdb_context *tdb;
+       struct ntdb_context *ntdb;
        int count = -1;
-       union tdb_attribute log_attr;
+       union ntdb_attribute log_attr;
 
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
+       log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
        log_attr.base.next = NULL;
-       log_attr.log.fn = tdb_log;
+       log_attr.log.fn = ntdb_log;
 
-       /* open the tdb */
-       tdb = tdb_open(fname, TDB_DEFAULT, O_RDONLY, 0, &log_attr);
+       /* open the ntdb */
+       ntdb = ntdb_open(fname, NTDB_DEFAULT, O_RDONLY, 0, &log_attr);
 
-       /* traverse the tdb, then close it */
-       if (tdb) {
-               count = tdb_traverse(tdb, test_fn, NULL);
-               tdb_close(tdb);
+       /* traverse the ntdb, then close it */
+       if (ntdb) {
+               count = ntdb_traverse(ntdb, test_fn, NULL);
+               ntdb_close(ntdb);
        }
 
        /* count is < 0 means an error */
        if (count < 0) {
                printf("restoring %s\n", fname);
-               return backup_tdb(bak_name, fname);
+               return backup_ntdb(bak_name, fname);
        }
 
        printf("%s : %d records\n", fname, count);
@@ -278,7 +278,7 @@ static int file_newer(const char *fname1, const char *fname2)
 
 static void usage(void)
 {
-       printf("Usage: tdb2backup [options] <fname...>\n\n");
+       printf("Usage: ntdbbackup [options] <fname...>\n\n");
        printf("   -h            this help message\n");
        printf("   -v            verify mode (restore if corrupt)\n");
        printf("   -s suffix     set the backup suffix\n");
@@ -323,12 +323,12 @@ static void usage(void)
                bak_name = add_suffix(fname, suffix);
 
                if (verify) {
-                       if (verify_tdb(fname, bak_name) != 0) {
+                       if (verify_ntdb(fname, bak_name) != 0) {
                                ret = 1;
                        }
                } else {
                        if (file_newer(fname, bak_name) &&
-                           backup_tdb(fname, bak_name) != 0) {
+                           backup_ntdb(fname, bak_name) != 0) {
                                ret = 1;
                        }
                }
similarity index 77%
rename from lib/tdb2/tools/tdb2dump.c
rename to lib/ntdb/tools/ntdbdump.c
index 40230a26431a8cf4bfd92f2b5b966b7f5bc25dde..1b1c59eae3684f34c8e8724ded9a4fe0c032040c 100644 (file)
@@ -1,5 +1,5 @@
 /*
-   simple tdb2 dump util
+   simple ntdb dump util
    Copyright (C) Andrew Tridgell              2001
    Copyright (C) Rusty Russell                2011
 
@@ -17,7 +17,7 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #ifdef HAVE_LIBREPLACE
 #include <replace.h>
 #include <system/filesys.h>
@@ -32,7 +32,7 @@
 #include <unistd.h>
 #endif
 
-static void print_data(TDB_DATA d)
+static void print_data(NTDB_DATA d)
 {
        unsigned char *p = (unsigned char *)d.dptr;
        int len = d.dsize;
@@ -46,7 +46,7 @@ static void print_data(TDB_DATA d)
        }
 }
 
-static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
+static int traverse_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
 {
        printf("{\n");
        printf("key(%d) = \"", (int)key.dsize);
@@ -59,22 +59,22 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, voi
        return 0;
 }
 
-static int dump_tdb(const char *fname, const char *keyname)
+static int dump_ntdb(const char *fname, const char *keyname)
 {
-       struct tdb_context *tdb;
-       TDB_DATA key, value;
+       struct ntdb_context *ntdb;
+       NTDB_DATA key, value;
 
-       tdb = tdb_open(fname, 0, O_RDONLY, 0, NULL);
-       if (!tdb) {
+       ntdb = ntdb_open(fname, 0, O_RDONLY, 0, NULL);
+       if (!ntdb) {
                printf("Failed to open %s\n", fname);
                return 1;
        }
 
        if (!keyname) {
-               tdb_traverse(tdb, traverse_fn, NULL);
+               ntdb_traverse(ntdb, traverse_fn, NULL);
        } else {
-               key = tdb_mkdata(keyname, strlen(keyname));
-               if (tdb_fetch(tdb, key, &value) != 0) {
+               key = ntdb_mkdata(keyname, strlen(keyname));
+               if (ntdb_fetch(ntdb, key, &value) != 0) {
                        return 1;
                } else {
                        print_data(value);
@@ -87,7 +87,7 @@ static int dump_tdb(const char *fname, const char *keyname)
 
 static void usage( void)
 {
-       printf( "Usage: tdb2dump [options] <filename>\n\n");
+       printf( "Usage: ntdbdump [options] <filename>\n\n");
        printf( "   -h          this help message\n");
        printf( "   -k keyname  dumps value of keyname\n");
 }
@@ -98,7 +98,7 @@ static void usage( void)
        int c;
 
        if (argc < 2) {
-               printf("Usage: tdb2dump <fname>\n");
+               printf("Usage: ntdbdump <fname>\n");
                exit(1);
        }
 
@@ -118,5 +118,5 @@ static void usage( void)
 
        fname = argv[optind];
 
-       return dump_tdb(fname, keyname);
+       return dump_ntdb(fname, keyname);
 }
similarity index 84%
rename from lib/tdb2/tools/tdb2restore.c
rename to lib/ntdb/tools/ntdbrestore.c
index 93c6c8bfe5ac1128fa519149c6de35c15e765dfa..dad591d562511eff9a1b27af06d946ebfb4cbad9 100644 (file)
@@ -1,5 +1,5 @@
 /*
-   tdb2restore -- construct a tdb from tdbdump output.
+   ntdbrestore -- construct a ntdb from tdbdump output.
    Copyright (C) Volker Lendecke               2010
    Copyright (C) Simon McVittie                        2005
 
@@ -18,7 +18,7 @@
 */
 
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include <assert.h>
 #ifdef HAVE_LIBREPLACE
 #include <replace.h>
@@ -88,7 +88,7 @@ static int read_hex(void) {
        }
 }
 
-static int read_data(FILE *f, struct tdb_data *d, size_t size) {
+static int read_data(FILE *f, NTDB_DATA *d, size_t size) {
        int c, low, high;
        int i;
 
@@ -141,12 +141,12 @@ static int swallow(FILE *f, const char *s, int *eof)
        return 0;
 }
 
-static bool read_rec(FILE *f, struct tdb_context *tdb, int *eof)
+static bool read_rec(FILE *f, struct ntdb_context *ntdb, int *eof)
 {
        int length;
-       struct tdb_data key, data;
+       NTDB_DATA key, data;
        bool ret = false;
-       enum TDB_ERROR e;
+       enum NTDB_ERROR e;
 
        key.dptr = NULL;
        data.dptr = NULL;
@@ -175,9 +175,9 @@ static bool read_rec(FILE *f, struct tdb_context *tdb, int *eof)
            || (swallow(f, "}\n", NULL) == -1)) {
                goto fail;
        }
-       e = tdb_store(tdb, key, data, TDB_INSERT);
-       if (e != TDB_SUCCESS) {
-               fprintf(stderr, "TDB error: %s\n", tdb_errorstr(e));
+       e = ntdb_store(ntdb, key, data, NTDB_INSERT);
+       if (e != NTDB_SUCCESS) {
+               fprintf(stderr, "NTDB error: %s\n", ntdb_errorstr(e));
                goto fail;
        }
 
@@ -188,28 +188,28 @@ fail:
        return ret;
 }
 
-static int restore_tdb(const char *fname)
+static int restore_ntdb(const char *fname)
 {
-       struct tdb_context *tdb;
+       struct ntdb_context *ntdb;
 
-       tdb = tdb_open(fname, 0, O_RDWR|O_CREAT|O_EXCL, 0666, NULL);
-       if (!tdb) {
-               perror("tdb_open");
+       ntdb = ntdb_open(fname, 0, O_RDWR|O_CREAT|O_EXCL, 0666, NULL);
+       if (!ntdb) {
+               perror("ntdb_open");
                fprintf(stderr, "Failed to open %s\n", fname);
                return 1;
        }
 
        while (1) {
                int eof = 0;
-               if (!read_rec(stdin, tdb, &eof)) {
+               if (!read_rec(stdin, ntdb, &eof)) {
                        if (eof) {
                                break;
                        }
                        return 1;
                }
        }
-       if (tdb_close(tdb)) {
-               fprintf(stderr, "Error closing tdb\n");
+       if (ntdb_close(ntdb)) {
+               fprintf(stderr, "Error closing ntdb\n");
                return 1;
        }
        fprintf(stderr, "EOF\n");
@@ -227,5 +227,5 @@ int main(int argc, char *argv[])
 
        fname = argv[1];
 
-       return restore_tdb(fname);
+       return restore_ntdb(fname);
 }
similarity index 69%
rename from lib/tdb2/tools/tdb2tool.c
rename to lib/ntdb/tools/ntdbtool.c
index ae20971143ff07cc0f30d6b23e56533ae14d0b67..7c1ef7df7aac7180480d10ffaf0709427f39ebd9 100644 (file)
@@ -21,7 +21,7 @@
 */
 
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #ifdef HAVE_LIBREPLACE
 #include <replace.h>
 #include <system/filesys.h>
@@ -46,13 +46,13 @@ char *arg1, *arg2;
 size_t arg1len, arg2len;
 int bIterate = 0;
 char *line;
-TDB_DATA iterate_kbuf;
+NTDB_DATA iterate_kbuf;
 char cmdline[1024];
 static int disable_mmap;
 
 enum commands {
-       CMD_CREATE_TDB,
-       CMD_OPEN_TDB,
+       CMD_CREATE_NTDB,
+       CMD_OPEN_NTDB,
        CMD_TRANSACTION_START,
        CMD_TRANSACTION_COMMIT,
        CMD_TRANSACTION_CANCEL,
@@ -86,8 +86,8 @@ typedef struct {
 } COMMAND_TABLE;
 
 COMMAND_TABLE cmd_table[] = {
-       {"create",      CMD_CREATE_TDB},
-       {"open",        CMD_OPEN_TDB},
+       {"create",      CMD_CREATE_NTDB},
+       {"open",        CMD_OPEN_NTDB},
 #if 0
        {"transaction_start",   CMD_TRANSACTION_START},
        {"transaction_commit",  CMD_TRANSACTION_COMMIT},
@@ -134,23 +134,23 @@ static double _end_timer(void)
               (tp2.tv_usec - tp1.tv_usec)*1.0e-6);
 }
 
-static void tdb_log(struct tdb_context *tdb,
-                   enum tdb_log_level level,
-                   enum TDB_ERROR ecode,
+static void ntdb_log(struct ntdb_context *ntdb,
+                   enum ntdb_log_level level,
+                   enum NTDB_ERROR ecode,
                    const char *message,
                    void *data)
 {
-       fprintf(stderr, "tdb:%s:%s:%s\n",
-               tdb_name(tdb), tdb_errorstr(ecode), message);
+       fprintf(stderr, "ntdb:%s:%s:%s\n",
+               ntdb_name(ntdb), ntdb_errorstr(ecode), message);
 }
 
-/* a tdb tool for manipulating a tdb database */
+/* a ntdb tool for manipulating a ntdb database */
 
-static struct tdb_context *tdb;
+static struct ntdb_context *ntdb;
 
-static int print_rec(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state);
-static int print_key(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state);
-static int print_hexkey(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state);
+static int print_rec(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state);
+static int print_key(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state);
+static int print_hexkey(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state);
 
 static void print_asc(const char *buf,int len)
 {
@@ -214,7 +214,7 @@ static void help(void)
 "  hexkeys              : dump the database keys as hex values\n"
 "  info                 : print summary info about the database\n"
 "  insert    key  data  : insert a record\n"
-"  move      key  file  : move a record to a destination tdb\n"
+"  move      key  file  : move a record to a destination ntdb\n"
 "  store     key  data  : store a record (replace)\n"
 "  show      key        : show a record by key\n"
 "  delete    key        : delete a record by key\n"
@@ -232,51 +232,51 @@ static void help(void)
 "\n");
 }
 
-static void terror(enum TDB_ERROR err, const char *why)
+static void terror(enum NTDB_ERROR err, const char *why)
 {
-       if (err != TDB_SUCCESS)
-               printf("%s:%s\n", tdb_errorstr(err), why);
+       if (err != NTDB_SUCCESS)
+               printf("%s:%s\n", ntdb_errorstr(err), why);
        else
                printf("%s\n", why);
 }
 
-static void create_tdb(const char *tdbname)
+static void create_ntdb(const char *tdbname)
 {
-       union tdb_attribute log_attr;
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
+       union ntdb_attribute log_attr;
+       log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
        log_attr.base.next = NULL;
-       log_attr.log.fn = tdb_log;
+       log_attr.log.fn = ntdb_log;
 
-       if (tdb) tdb_close(tdb);
-       tdb = tdb_open(tdbname, (disable_mmap?TDB_NOMMAP:0),
+       if (ntdb) ntdb_close(ntdb);
+       ntdb = ntdb_open(tdbname, (disable_mmap?NTDB_NOMMAP:0),
                       O_RDWR | O_CREAT | O_TRUNC, 0600, &log_attr);
-       if (!tdb) {
+       if (!ntdb) {
                printf("Could not create %s: %s\n", tdbname, strerror(errno));
        }
 }
 
-static void open_tdb(const char *tdbname)
+static void open_ntdb(const char *tdbname)
 {
-       union tdb_attribute log_attr;
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
+       union ntdb_attribute log_attr;
+       log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
        log_attr.base.next = NULL;
-       log_attr.log.fn = tdb_log;
+       log_attr.log.fn = ntdb_log;
 
-       if (tdb) tdb_close(tdb);
-       tdb = tdb_open(tdbname, disable_mmap?TDB_NOMMAP:0, O_RDWR, 0600,
+       if (ntdb) ntdb_close(ntdb);
+       ntdb = ntdb_open(tdbname, disable_mmap?NTDB_NOMMAP:0, O_RDWR, 0600,
                       &log_attr);
-       if (!tdb) {
+       if (!ntdb) {
                printf("Could not open %s: %s\n", tdbname, strerror(errno));
        }
 }
 
-static void insert_tdb(char *keyname, size_t keylen, char* data, size_t datalen)
+static void insert_ntdb(char *keyname, size_t keylen, char* data, size_t datalen)
 {
-       TDB_DATA key, dbuf;
-       enum TDB_ERROR ecode;
+       NTDB_DATA key, dbuf;
+       enum NTDB_ERROR ecode;
 
        if ((keyname == NULL) || (keylen == 0)) {
-               terror(TDB_SUCCESS, "need key");
+               terror(NTDB_SUCCESS, "need key");
                return;
        }
 
@@ -285,24 +285,24 @@ static void insert_tdb(char *keyname, size_t keylen, char* data, size_t datalen)
        dbuf.dptr = (unsigned char *)data;
        dbuf.dsize = datalen;
 
-       ecode = tdb_store(tdb, key, dbuf, TDB_INSERT);
+       ecode = ntdb_store(ntdb, key, dbuf, NTDB_INSERT);
        if (ecode) {
                terror(ecode, "insert failed");
        }
 }
 
-static void store_tdb(char *keyname, size_t keylen, char* data, size_t datalen)
+static void store_ntdb(char *keyname, size_t keylen, char* data, size_t datalen)
 {
-       TDB_DATA key, dbuf;
-       enum TDB_ERROR ecode;
+       NTDB_DATA key, dbuf;
+       enum NTDB_ERROR ecode;
 
        if ((keyname == NULL) || (keylen == 0)) {
-               terror(TDB_SUCCESS, "need key");
+               terror(NTDB_SUCCESS, "need key");
                return;
        }
 
        if ((data == NULL) || (datalen == 0)) {
-               terror(TDB_SUCCESS, "need data");
+               terror(NTDB_SUCCESS, "need data");
                return;
        }
 
@@ -312,52 +312,52 @@ static void store_tdb(char *keyname, size_t keylen, char* data, size_t datalen)
        dbuf.dsize = datalen;
 
        printf("Storing key:\n");
-       print_rec(tdb, key, dbuf, NULL);
+       print_rec(ntdb, key, dbuf, NULL);
 
-       ecode = tdb_store(tdb, key, dbuf, TDB_REPLACE);
+       ecode = ntdb_store(ntdb, key, dbuf, NTDB_REPLACE);
        if (ecode) {
                terror(ecode, "store failed");
        }
 }
 
-static void show_tdb(char *keyname, size_t keylen)
+static void show_ntdb(char *keyname, size_t keylen)
 {
-       TDB_DATA key, dbuf;
-       enum TDB_ERROR ecode;
+       NTDB_DATA key, dbuf;
+       enum NTDB_ERROR ecode;
 
        if ((keyname == NULL) || (keylen == 0)) {
-               terror(TDB_SUCCESS, "need key");
+               terror(NTDB_SUCCESS, "need key");
                return;
        }
 
        key.dptr = (unsigned char *)keyname;
        key.dsize = keylen;
 
-       ecode = tdb_fetch(tdb, key, &dbuf);
+       ecode = ntdb_fetch(ntdb, key, &dbuf);
        if (ecode) {
                terror(ecode, "fetch failed");
                return;
        }
 
-       print_rec(tdb, key, dbuf, NULL);
+       print_rec(ntdb, key, dbuf, NULL);
 
        free( dbuf.dptr );
 }
 
-static void delete_tdb(char *keyname, size_t keylen)
+static void delete_ntdb(char *keyname, size_t keylen)
 {
-       TDB_DATA key;
-       enum TDB_ERROR ecode;
+       NTDB_DATA key;
+       enum NTDB_ERROR ecode;
 
        if ((keyname == NULL) || (keylen == 0)) {
-               terror(TDB_SUCCESS, "need key");
+               terror(NTDB_SUCCESS, "need key");
                return;
        }
 
        key.dptr = (unsigned char *)keyname;
        key.dsize = keylen;
 
-       ecode = tdb_delete(tdb, key);
+       ecode = ntdb_delete(ntdb, key);
        if (ecode) {
                terror(ecode, "delete failed");
        }
@@ -365,47 +365,47 @@ static void delete_tdb(char *keyname, size_t keylen)
 
 static void move_rec(char *keyname, size_t keylen, char* tdbname)
 {
-       TDB_DATA key, dbuf;
-       struct tdb_context *dst_tdb;
-       enum TDB_ERROR ecode;
+       NTDB_DATA key, dbuf;
+       struct ntdb_context *dst_ntdb;
+       enum NTDB_ERROR ecode;
 
        if ((keyname == NULL) || (keylen == 0)) {
-               terror(TDB_SUCCESS, "need key");
+               terror(NTDB_SUCCESS, "need key");
                return;
        }
 
        if ( !tdbname ) {
-               terror(TDB_SUCCESS, "need destination tdb name");
+               terror(NTDB_SUCCESS, "need destination ntdb name");
                return;
        }
 
        key.dptr = (unsigned char *)keyname;
        key.dsize = keylen;
 
-       ecode = tdb_fetch(tdb, key, &dbuf);
+       ecode = ntdb_fetch(ntdb, key, &dbuf);
        if (ecode) {
                terror(ecode, "fetch failed");
                return;
        }
 
-       print_rec(tdb, key, dbuf, NULL);
+       print_rec(ntdb, key, dbuf, NULL);
 
-       dst_tdb = tdb_open(tdbname, 0, O_RDWR, 0600, NULL);
-       if ( !dst_tdb ) {
-               terror(TDB_SUCCESS, "unable to open destination tdb");
+       dst_ntdb = ntdb_open(tdbname, 0, O_RDWR, 0600, NULL);
+       if ( !dst_ntdb ) {
+               terror(NTDB_SUCCESS, "unable to open destination ntdb");
                return;
        }
 
-       ecode = tdb_store( dst_tdb, key, dbuf, TDB_REPLACE);
+       ecode = ntdb_store( dst_ntdb, key, dbuf, NTDB_REPLACE);
        if (ecode)
                terror(ecode, "failed to move record");
        else
                printf("record moved\n");
 
-       tdb_close( dst_tdb );
+       ntdb_close( dst_ntdb );
 }
 
-static int print_rec(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
+static int print_rec(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
 {
        printf("\nkey %d bytes\n", (int)key.dsize);
        print_asc((const char *)key.dptr, key.dsize);
@@ -414,7 +414,7 @@ static int print_rec(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, v
        return 0;
 }
 
-static int print_key(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
+static int print_key(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
 {
        printf("key %d bytes: ", (int)key.dsize);
        print_asc((const char *)key.dptr, key.dsize);
@@ -422,7 +422,7 @@ static int print_key(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, v
        return 0;
 }
 
-static int print_hexkey(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
+static int print_hexkey(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
 {
        printf("key %d bytes\n", (int)key.dsize);
        print_data((const char *)key.dptr, key.dsize);
@@ -432,18 +432,18 @@ static int print_hexkey(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf
 
 static int total_bytes;
 
-static int traverse_fn(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
+static int traverse_fn(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
 {
        total_bytes += dbuf.dsize;
        return 0;
 }
 
-static void info_tdb(void)
+static void info_ntdb(void)
 {
-       enum TDB_ERROR ecode;
+       enum NTDB_ERROR ecode;
        char *summary;
 
-       ecode = tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &summary);
+       ecode = ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &summary);
 
        if (ecode) {
                terror(ecode, "Getting summary");
@@ -453,7 +453,7 @@ static void info_tdb(void)
        }
 }
 
-static void speed_tdb(const char *tlimit)
+static void speed_ntdb(const char *tlimit)
 {
        unsigned timelimit = tlimit?atoi(tlimit):0;
        double t;
@@ -465,11 +465,11 @@ static void speed_tdb(const char *tlimit)
        _start_timer();
        do {
                long int r = random();
-               TDB_DATA key, dbuf;
-               key = tdb_mkdata("store test", strlen("store test"));
+               NTDB_DATA key, dbuf;
+               key = ntdb_mkdata("store test", strlen("store test"));
                dbuf.dptr = (unsigned char *)&r;
                dbuf.dsize = sizeof(r);
-               tdb_store(tdb, key, dbuf, TDB_REPLACE);
+               ntdb_store(ntdb, key, dbuf, NTDB_REPLACE);
                t = _end_timer();
                ops++;
        } while (t < timelimit);
@@ -480,11 +480,11 @@ static void speed_tdb(const char *tlimit)
        _start_timer();
        do {
                long int r = random();
-               TDB_DATA key, dbuf;
-               key = tdb_mkdata("store test", strlen("store test"));
+               NTDB_DATA key, dbuf;
+               key = ntdb_mkdata("store test", strlen("store test"));
                dbuf.dptr = (unsigned char *)&r;
                dbuf.dsize = sizeof(r);
-               tdb_fetch(tdb, key, &dbuf);
+               ntdb_fetch(ntdb, key, &dbuf);
                t = _end_timer();
                ops++;
        } while (t < timelimit);
@@ -495,13 +495,13 @@ static void speed_tdb(const char *tlimit)
        _start_timer();
        do {
                long int r = random();
-               TDB_DATA key, dbuf;
-               key = tdb_mkdata("transaction test", strlen("transaction test"));
+               NTDB_DATA key, dbuf;
+               key = ntdb_mkdata("transaction test", strlen("transaction test"));
                dbuf.dptr = (unsigned char *)&r;
                dbuf.dsize = sizeof(r);
-               tdb_transaction_start(tdb);
-               tdb_store(tdb, key, dbuf, TDB_REPLACE);
-               tdb_transaction_commit(tdb);
+               ntdb_transaction_start(ntdb);
+               ntdb_store(ntdb, key, dbuf, NTDB_REPLACE);
+               ntdb_transaction_commit(ntdb);
                t = _end_timer();
                ops++;
        } while (t < timelimit);
@@ -511,7 +511,7 @@ static void speed_tdb(const char *tlimit)
        printf("Testing traverse speed for %u seconds\n", timelimit);
        _start_timer();
        do {
-               tdb_traverse(tdb, traverse_fn, NULL);
+               ntdb_traverse(ntdb, traverse_fn, NULL);
                t = _end_timer();
                ops++;
        } while (t < timelimit);
@@ -528,7 +528,7 @@ static void toggle_mmap(void)
        }
 }
 
-static char *tdb_getline(const char *prompt)
+static char *ntdb_getline(const char *prompt)
 {
        static char thisline[1024];
        char *p;
@@ -540,45 +540,45 @@ static char *tdb_getline(const char *prompt)
        return p?thisline:NULL;
 }
 
-static int do_delete_fn(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf,
+static int do_delete_fn(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf,
                      void *state)
 {
-    return tdb_delete(the_tdb, key);
+    return ntdb_delete(the_ntdb, key);
 }
 
-static void first_record(struct tdb_context *the_tdb, TDB_DATA *pkey)
+static void first_record(struct ntdb_context *the_ntdb, NTDB_DATA *pkey)
 {
-       TDB_DATA dbuf;
-       enum TDB_ERROR ecode;
-       ecode = tdb_firstkey(the_tdb, pkey);
+       NTDB_DATA dbuf;
+       enum NTDB_ERROR ecode;
+       ecode = ntdb_firstkey(the_ntdb, pkey);
        if (!ecode)
-               ecode = tdb_fetch(the_tdb, *pkey, &dbuf);
+               ecode = ntdb_fetch(the_ntdb, *pkey, &dbuf);
        if (ecode) terror(ecode, "fetch failed");
        else {
-               print_rec(the_tdb, *pkey, dbuf, NULL);
+               print_rec(the_ntdb, *pkey, dbuf, NULL);
        }
 }
 
-static void next_record(struct tdb_context *the_tdb, TDB_DATA *pkey)
+static void next_record(struct ntdb_context *the_ntdb, NTDB_DATA *pkey)
 {
-       TDB_DATA dbuf;
-       enum TDB_ERROR ecode;
-       ecode = tdb_nextkey(the_tdb, pkey);
+       NTDB_DATA dbuf;
+       enum NTDB_ERROR ecode;
+       ecode = ntdb_nextkey(the_ntdb, pkey);
 
        if (!ecode)
-               ecode = tdb_fetch(the_tdb, *pkey, &dbuf);
+               ecode = ntdb_fetch(the_ntdb, *pkey, &dbuf);
        if (ecode)
                terror(ecode, "fetch failed");
        else
-               print_rec(the_tdb, *pkey, dbuf, NULL);
+               print_rec(the_ntdb, *pkey, dbuf, NULL);
 }
 
-static void check_db(struct tdb_context *the_tdb)
+static void check_db(struct ntdb_context *the_ntdb)
 {
-       if (!the_tdb) {
+       if (!the_ntdb) {
                printf("Error: No database opened!\n");
        } else {
-               if (tdb_check(the_tdb, NULL, NULL) != 0)
+               if (ntdb_check(the_ntdb, NULL, NULL) != 0)
                        printf("Integrity check for the opened database failed.\n");
                else
                        printf("Database integrity is OK.\n");
@@ -605,54 +605,54 @@ static int do_command(void)
        }
 
        switch (mycmd) {
-       case CMD_CREATE_TDB:
+       case CMD_CREATE_NTDB:
                bIterate = 0;
-               create_tdb(arg1);
+               create_ntdb(arg1);
                return 0;
-       case CMD_OPEN_TDB:
+       case CMD_OPEN_NTDB:
                bIterate = 0;
-               open_tdb(arg1);
+               open_ntdb(arg1);
                return 0;
        case CMD_SYSTEM:
                /* Shell command */
                if (system(arg1) == -1) {
-                       terror(TDB_SUCCESS, "system() call failed\n");
+                       terror(NTDB_SUCCESS, "system() call failed\n");
                }
                return 0;
        case CMD_QUIT:
                return 1;
        default:
                /* all the rest require a open database */
-               if (!tdb) {
+               if (!ntdb) {
                        bIterate = 0;
-                       terror(TDB_SUCCESS, "database not open");
+                       terror(NTDB_SUCCESS, "database not open");
                        help();
                        return 0;
                }
                switch (mycmd) {
                case CMD_TRANSACTION_START:
                        bIterate = 0;
-                       tdb_transaction_start(tdb);
+                       ntdb_transaction_start(ntdb);
                        return 0;
                case CMD_TRANSACTION_COMMIT:
                        bIterate = 0;
-                       tdb_transaction_commit(tdb);
+                       ntdb_transaction_commit(ntdb);
                        return 0;
                case CMD_TRANSACTION_CANCEL:
                        bIterate = 0;
-                       tdb_transaction_cancel(tdb);
+                       ntdb_transaction_cancel(ntdb);
                        return 0;
                case CMD_ERASE:
                        bIterate = 0;
-                       tdb_traverse(tdb, do_delete_fn, NULL);
+                       ntdb_traverse(ntdb, do_delete_fn, NULL);
                        return 0;
                case CMD_DUMP:
                        bIterate = 0;
-                       tdb_traverse(tdb, print_rec, NULL);
+                       ntdb_traverse(ntdb, print_rec, NULL);
                        return 0;
                case CMD_INSERT:
                        bIterate = 0;
-                       insert_tdb(arg1, arg1len,arg2,arg2len);
+                       insert_ntdb(arg1, arg1len,arg2,arg2len);
                        return 0;
                case CMD_MOVE:
                        bIterate = 0;
@@ -660,55 +660,55 @@ static int do_command(void)
                        return 0;
                case CMD_STORE:
                        bIterate = 0;
-                       store_tdb(arg1,arg1len,arg2,arg2len);
+                       store_ntdb(arg1,arg1len,arg2,arg2len);
                        return 0;
                case CMD_SHOW:
                        bIterate = 0;
-                       show_tdb(arg1, arg1len);
+                       show_ntdb(arg1, arg1len);
                        return 0;
                case CMD_KEYS:
-                       tdb_traverse(tdb, print_key, NULL);
+                       ntdb_traverse(ntdb, print_key, NULL);
                        return 0;
                case CMD_HEXKEYS:
-                       tdb_traverse(tdb, print_hexkey, NULL);
+                       ntdb_traverse(ntdb, print_hexkey, NULL);
                        return 0;
                case CMD_DELETE:
                        bIterate = 0;
-                       delete_tdb(arg1,arg1len);
+                       delete_ntdb(arg1,arg1len);
                        return 0;
 #if 0
                case CMD_LIST_HASH_FREE:
-                       tdb_dump_all(tdb);
+                       ntdb_dump_all(ntdb);
                        return 0;
                case CMD_LIST_FREE:
-                       tdb_printfreelist(tdb);
+                       ntdb_printfreelist(ntdb);
                        return 0;
 #endif
                case CMD_INFO:
-                       info_tdb();
+                       info_ntdb();
                        return 0;
                case CMD_SPEED:
-                       speed_tdb(arg1);
+                       speed_ntdb(arg1);
                        return 0;
                case CMD_MMAP:
                        toggle_mmap();
                        return 0;
                case CMD_FIRST:
                        bIterate = 1;
-                       first_record(tdb, &iterate_kbuf);
+                       first_record(ntdb, &iterate_kbuf);
                        return 0;
                case CMD_NEXT:
                        if (bIterate)
-                               next_record(tdb, &iterate_kbuf);
+                               next_record(ntdb, &iterate_kbuf);
                        return 0;
                case CMD_CHECK:
-                       check_db(tdb);
+                       check_db(ntdb);
                        return 0;
                case CMD_HELP:
                        help();
                        return 0;
-               case CMD_CREATE_TDB:
-               case CMD_OPEN_TDB:
+               case CMD_CREATE_NTDB:
+               case CMD_OPEN_NTDB:
                case CMD_SYSTEM:
                case CMD_QUIT:
                        /*
@@ -773,7 +773,7 @@ int main(int argc, char *argv[])
        case 1:
        case 2:
                /* Interactive mode */
-               while ((cmdname = tdb_getline("tdb> "))) {
+               while ((cmdname = ntdb_getline("ntdb> "))) {
                        arg2 = arg1 = NULL;
                        if ((arg1 = strchr((const char *)cmdname,' ')) != NULL) {
                                arg1++;
@@ -804,7 +804,7 @@ int main(int argc, char *argv[])
                break;
        }
 
-       if (tdb) tdb_close(tdb);
+       if (ntdb) ntdb_close(ntdb);
 
        return 0;
 }
similarity index 76%
rename from lib/tdb2/tools/tdb2torture.c
rename to lib/ntdb/tools/ntdbtorture.c
index 73e2e29874f246c81230bf4cc372ec0ce2bcf8b9..c7b249db069f9726d12a049d8c8daf363b8a1c89 100644 (file)
@@ -1,9 +1,9 @@
-/* this tests tdb by doing lots of ops from several simultaneous
+/* this tests ntdb by doing lots of ops from several simultaneous
    writers - that stresses the locking code.
 */
 
 #include "config.h"
-#include "tdb2.h"
+#include "ntdb.h"
 #include <ccan/err/err.h>
 #ifdef HAVE_LIBREPLACE
 #include <replace.h>
@@ -35,7 +35,7 @@
 #define KEYLEN 3
 #define DATALEN 100
 
-static struct tdb_context *db;
+static struct ntdb_context *db;
 static int in_transaction;
 static int in_traverse;
 static int error_count;
@@ -44,17 +44,17 @@ static int always_transaction = 0;
 #endif
 static int loopnum;
 static int count_pipe;
-static union tdb_attribute log_attr;
-static union tdb_attribute seed_attr;
+static union ntdb_attribute log_attr;
+static union ntdb_attribute seed_attr;
 
-static void tdb_log(struct tdb_context *tdb,
-                   enum tdb_log_level level,
-                   enum TDB_ERROR ecode,
+static void ntdb_log(struct ntdb_context *ntdb,
+                   enum ntdb_log_level level,
+                   enum NTDB_ERROR ecode,
                    const char *message,
                    void *data)
 {
-       printf("tdb:%s:%s:%s\n",
-              tdb_name(tdb), tdb_errorstr(ecode), message);
+       printf("ntdb:%s:%s:%s\n",
+              ntdb_name(ntdb), ntdb_errorstr(ecode), message);
        fflush(stdout);
 #if 0
        {
@@ -80,10 +80,10 @@ static void segv_handler(int sig, siginfo_t *info, void *p)
        _exit(11);
 }
 
-static void fatal(struct tdb_context *tdb, const char *why)
+static void fatal(struct ntdb_context *ntdb, const char *why)
 {
        fprintf(stderr, "%u:%s:%s\n", getpid(), why,
-               tdb ? tdb_errorstr(tdb_error(tdb)) : "(no tdb)");
+               ntdb ? ntdb_errorstr(ntdb_error(ntdb)) : "(no ntdb)");
        error_count++;
 }
 
@@ -101,12 +101,12 @@ static char *randbuf(int len)
 }
 
 static void addrec_db(void);
-static int modify_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
+static int modify_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
                           void *state)
 {
 #if CULL_PROB
        if (random() % CULL_PROB == 0) {
-               tdb_delete(tdb, key);
+               ntdb_delete(ntdb, key);
        }
 #endif
 
@@ -128,7 +128,7 @@ static void addrec_db(void)
 {
        int klen, dlen;
        char *k, *d;
-       TDB_DATA key, data;
+       NTDB_DATA key, data;
 
        klen = 1 + (rand() % KEYLEN);
        dlen = 1 + (rand() % DATALEN);
@@ -144,34 +144,34 @@ static void addrec_db(void)
 
 #if REOPEN_PROB
        if (in_traverse == 0 && in_transaction == 0 && random() % REOPEN_PROB == 0) {
-               tdb_reopen_all(0);
+               ntdb_reopen_all(0);
                goto next;
        }
 #endif
 
 #if TRANSACTION_PROB
        if (in_traverse == 0 && in_transaction == 0 && (always_transaction || random() % TRANSACTION_PROB == 0)) {
-               if (tdb_transaction_start(db) != 0) {
-                       fatal(db, "tdb_transaction_start failed");
+               if (ntdb_transaction_start(db) != 0) {
+                       fatal(db, "ntdb_transaction_start failed");
                }
                in_transaction++;
                goto next;
        }
        if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) {
                if (random() % TRANSACTION_PREPARE_PROB == 0) {
-                       if (tdb_transaction_prepare_commit(db) != 0) {
-                               fatal(db, "tdb_transaction_prepare_commit failed");
+                       if (ntdb_transaction_prepare_commit(db) != 0) {
+                               fatal(db, "ntdb_transaction_prepare_commit failed");
                        }
                }
-               if (tdb_transaction_commit(db) != 0) {
-                       fatal(db, "tdb_transaction_commit failed");
+               if (ntdb_transaction_commit(db) != 0) {
+                       fatal(db, "ntdb_transaction_commit failed");
                }
                in_transaction--;
                goto next;
        }
 
        if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) {
-               tdb_transaction_cancel(db);
+               ntdb_transaction_cancel(db);
                in_transaction--;
                goto next;
        }
@@ -179,15 +179,15 @@ static void addrec_db(void)
 
 #if DELETE_PROB
        if (random() % DELETE_PROB == 0) {
-               tdb_delete(db, key);
+               ntdb_delete(db, key);
                goto next;
        }
 #endif
 
 #if STORE_PROB
        if (random() % STORE_PROB == 0) {
-               if (tdb_store(db, key, data, TDB_REPLACE) != 0) {
-                       fatal(db, "tdb_store failed");
+               if (ntdb_store(db, key, data, NTDB_REPLACE) != 0) {
+                       fatal(db, "ntdb_store failed");
                }
                goto next;
        }
@@ -195,8 +195,8 @@ static void addrec_db(void)
 
 #if APPEND_PROB
        if (random() % APPEND_PROB == 0) {
-               if (tdb_append(db, key, data) != 0) {
-                       fatal(db, "tdb_append failed");
+               if (ntdb_append(db, key, data) != 0) {
+                       fatal(db, "ntdb_append failed");
                }
                goto next;
        }
@@ -204,16 +204,16 @@ static void addrec_db(void)
 
 #if LOCKSTORE_PROB
        if (random() % LOCKSTORE_PROB == 0) {
-               tdb_chainlock(db, key);
-               if (tdb_fetch(db, key, &data) != TDB_SUCCESS) {
+               ntdb_chainlock(db, key);
+               if (ntdb_fetch(db, key, &data) != NTDB_SUCCESS) {
                        data.dsize = 0;
                        data.dptr = NULL;
                }
-               if (tdb_store(db, key, data, TDB_REPLACE) != 0) {
-                       fatal(db, "tdb_store failed");
+               if (ntdb_store(db, key, data, NTDB_REPLACE) != 0) {
+                       fatal(db, "ntdb_store failed");
                }
                if (data.dptr) free(data.dptr);
-               tdb_chainunlock(db, key);
+               ntdb_chainunlock(db, key);
                goto next;
        }
 #endif
@@ -222,13 +222,13 @@ static void addrec_db(void)
        /* FIXME: recursive traverses break transactions? */
        if (in_traverse == 0 && random() % TRAVERSE_PROB == 0) {
                in_traverse++;
-               tdb_traverse(db, modify_traverse, NULL);
+               ntdb_traverse(db, modify_traverse, NULL);
                in_traverse--;
                goto next;
        }
 #endif
 
-       if (tdb_fetch(db, key, &data) == TDB_SUCCESS)
+       if (ntdb_fetch(db, key, &data) == NTDB_SUCCESS)
                free(data.dptr);
 
 next:
@@ -236,16 +236,16 @@ next:
        free(d);
 }
 
-static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
+static int traverse_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
                        void *state)
 {
-       tdb_delete(tdb, key);
+       ntdb_delete(ntdb, key);
        return 0;
 }
 
 static void usage(void)
 {
-       printf("Usage: tdb2torture"
+       printf("Usage: ntdbtorture"
 #if TRANSACTION_PROB
               " [-t]"
 #endif
@@ -263,13 +263,13 @@ static void send_count_and_suicide(int sig)
 }
 
 static int run_child(const char *filename, int i, int seed, unsigned num_loops,
-                    unsigned start, int tdb_flags)
+                    unsigned start, int ntdb_flags)
 {
        struct sigaction act = { .sa_sigaction = segv_handler,
                                 .sa_flags = SA_SIGINFO };
        sigaction(11, &act, NULL);
 
-       db = tdb_open(filename, tdb_flags, O_RDWR | O_CREAT, 0600,
+       db = ntdb_open(filename, ntdb_flags, O_RDWR | O_CREAT, 0600,
                      &log_attr);
        if (!db) {
                fatal(NULL, "db open failed");
@@ -295,29 +295,29 @@ static int run_child(const char *filename, int i, int seed, unsigned num_loops,
        }
 
        if (error_count == 0) {
-               tdb_traverse(db, NULL, NULL);
+               ntdb_traverse(db, NULL, NULL);
 #if TRANSACTION_PROB
                if (always_transaction) {
                        while (in_transaction) {
-                               tdb_transaction_cancel(db);
+                               ntdb_transaction_cancel(db);
                                in_transaction--;
                        }
-                       if (tdb_transaction_start(db) != 0)
-                               fatal(db, "tdb_transaction_start failed");
+                       if (ntdb_transaction_start(db) != 0)
+                               fatal(db, "ntdb_transaction_start failed");
                }
 #endif
-               tdb_traverse(db, traverse_fn, NULL);
-               tdb_traverse(db, traverse_fn, NULL);
+               ntdb_traverse(db, traverse_fn, NULL);
+               ntdb_traverse(db, traverse_fn, NULL);
 
 #if TRANSACTION_PROB
                if (always_transaction) {
-                       if (tdb_transaction_commit(db) != 0)
-                               fatal(db, "tdb_transaction_commit failed");
+                       if (ntdb_transaction_commit(db) != 0)
+                               fatal(db, "ntdb_transaction_commit failed");
                }
 #endif
        }
 
-       tdb_close(db);
+       ntdb_close(db);
 
        return (error_count < 100 ? error_count : 100);
 }
@@ -350,13 +350,13 @@ int main(int argc, char * const *argv)
        pid_t *pids;
        int kill_random = 0;
        int *done;
-       int tdb_flags = TDB_DEFAULT;
-       char *test_tdb;
+       int ntdb_flags = NTDB_DEFAULT;
+       char *test_ntdb;
 
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
+       log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
        log_attr.base.next = &seed_attr;
-       log_attr.log.fn = tdb_log;
-       seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
+       log_attr.log.fn = ntdb_log;
+       seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
        seed_attr.base.next = NULL;
 
        while ((c = getopt(argc, argv, "n:l:s:thkS")) != -1) {
@@ -371,7 +371,7 @@ int main(int argc, char * const *argv)
                        seed = strtol(optarg, NULL, 0);
                        break;
                case 'S':
-                       tdb_flags = TDB_NOSYNC;
+                       ntdb_flags = NTDB_NOSYNC;
                        break;
                case 't':
 #if TRANSACTION_PROB
@@ -389,9 +389,9 @@ int main(int argc, char * const *argv)
                }
        }
 
-       test_tdb = test_path("torture.tdb2");
+       test_ntdb = test_path("torture.ntdb");
 
-       unlink(test_tdb);
+       unlink(test_ntdb);
 
        if (seed == -1) {
                seed = (getpid() + time(NULL)) & 0x7FFFFFFF;
@@ -400,8 +400,8 @@ int main(int argc, char * const *argv)
 
        if (num_procs == 1 && !kill_random) {
                /* Don't fork for this case, makes debugging easier. */
-               error_count = run_child(test_tdb, 0, seed, num_loops, 0,
-                                       tdb_flags);
+               error_count = run_child(test_ntdb, 0, seed, num_loops, 0,
+                                       ntdb_flags);
                goto done;
        }
 
@@ -427,8 +427,8 @@ int main(int argc, char * const *argv)
 #endif
                                        );
                        }
-                       exit(run_child(test_tdb, i, seed, num_loops, 0,
-                                      tdb_flags));
+                       exit(run_child(test_ntdb, i, seed, num_loops, 0,
+                                      ntdb_flags));
                }
        }
 
@@ -484,9 +484,9 @@ int main(int argc, char * const *argv)
                                }
                                pids[j] = fork();
                                if (pids[j] == 0)
-                                       exit(run_child(test_tdb, j, seed,
+                                       exit(run_child(test_ntdb, j, seed,
                                                       num_loops, done[j],
-                                                      tdb_flags));
+                                                      ntdb_flags));
                                printf("Restarting child %i for %u-%u\n",
                                       j, done[j], num_loops);
                                continue;
@@ -510,20 +510,20 @@ int main(int argc, char * const *argv)
 
 done:
        if (error_count == 0) {
-               db = tdb_open(test_tdb, TDB_DEFAULT, O_RDWR | O_CREAT,
+               db = ntdb_open(test_ntdb, NTDB_DEFAULT, O_RDWR | O_CREAT,
                              0600, &log_attr);
                if (!db) {
                        fatal(db, "db open failed");
                        exit(1);
                }
-               if (tdb_check(db, NULL, NULL) != 0) {
+               if (ntdb_check(db, NULL, NULL) != 0) {
                        fatal(db, "db check failed");
                        exit(1);
                }
-               tdb_close(db);
+               ntdb_close(db);
                printf("OK\n");
        }
 
-       free(test_tdb);
+       free(test_ntdb);
        return error_count;
 }
similarity index 60%
rename from lib/tdb2/tools/speed.c
rename to lib/ntdb/tools/speed.c
index 259d53f6c81ee1eac49897daace63bc3d68586eb..868494b89891e63eb534a2872ebc90294631b9f9 100644 (file)
@@ -1,4 +1,4 @@
-/* Simple speed test for TDB */
+/* Simple speed test for NTDB */
 #include <ccan/err/err.h>
 #include <time.h>
 #include <sys/types.h>
@@ -10,7 +10,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdbool.h>
-#include "tdb2.h"
+#include "ntdb.h"
 
 /* Nanoseconds per operation */
 static size_t normalize(const struct timeval *start,
@@ -30,31 +30,31 @@ static size_t file_size(void)
 {
        struct stat st;
 
-       if (stat("/tmp/speed.tdb", &st) != 0)
+       if (stat("/tmp/speed.ntdb", &st) != 0)
                return -1;
        return st.st_size;
 }
 
-static int count_record(struct tdb_context *tdb,
-                       TDB_DATA key, TDB_DATA data, void *p)
+static int count_record(struct ntdb_context *ntdb,
+                       NTDB_DATA key, NTDB_DATA data, void *p)
 {
        int *total = p;
        *total += *(int *)data.dptr;
        return 0;
 }
 
-static void dump_and_clear_stats(struct tdb_context **tdb,
+static void dump_and_clear_stats(struct ntdb_context **ntdb,
                                 int flags,
-                                union tdb_attribute *attr)
+                                union ntdb_attribute *attr)
 {
-       union tdb_attribute stats;
-       enum TDB_ERROR ecode;
+       union ntdb_attribute stats;
+       enum NTDB_ERROR ecode;
 
-       stats.base.attr = TDB_ATTRIBUTE_STATS;
+       stats.base.attr = NTDB_ATTRIBUTE_STATS;
        stats.stats.size = sizeof(stats.stats);
-       ecode = tdb_get_attribute(*tdb, &stats);
-       if (ecode != TDB_SUCCESS)
-               errx(1, "Getting stats: %s", tdb_errorstr(ecode));
+       ecode = ntdb_get_attribute(*ntdb, &stats);
+       if (ecode != NTDB_SUCCESS)
+               errx(1, "Getting stats: %s", ntdb_errorstr(ecode));
 
        printf("allocs = %llu\n",
               (unsigned long long)stats.stats.allocs);
@@ -122,43 +122,43 @@ static void dump_and_clear_stats(struct tdb_context **tdb,
               (unsigned long long)stats.stats.lock_nonblock_fail);
 
        /* Now clear. */
-       tdb_close(*tdb);
-       *tdb = tdb_open("/tmp/speed.tdb", flags, O_RDWR, 0, attr);
+       ntdb_close(*ntdb);
+       *ntdb = ntdb_open("/tmp/speed.ntdb", flags, O_RDWR, 0, attr);
 }
 
-static void tdb_log(struct tdb_context *tdb,
-                   enum tdb_log_level level,
-                   enum TDB_ERROR ecode,
+static void ntdb_log(struct ntdb_context *ntdb,
+                   enum ntdb_log_level level,
+                   enum NTDB_ERROR ecode,
                    const char *message,
                    void *data)
 {
-       fprintf(stderr, "tdb:%s:%s:%s\n",
-               tdb_name(tdb), tdb_errorstr(ecode), message);
+       fprintf(stderr, "ntdb:%s:%s:%s\n",
+               ntdb_name(ntdb), ntdb_errorstr(ecode), message);
 }
 
 int main(int argc, char *argv[])
 {
        unsigned int i, j, num = 1000, stage = 0, stopat = -1;
-       int flags = TDB_DEFAULT;
+       int flags = NTDB_DEFAULT;
        bool transaction = false, summary = false;
-       TDB_DATA key, data;
-       struct tdb_context *tdb;
+       NTDB_DATA key, data;
+       struct ntdb_context *ntdb;
        struct timeval start, stop;
-       union tdb_attribute seed, log;
+       union ntdb_attribute seed, log;
        bool do_stats = false;
-       enum TDB_ERROR ecode;
+       enum NTDB_ERROR ecode;
 
        /* Try to keep benchmarks even. */
-       seed.base.attr = TDB_ATTRIBUTE_SEED;
+       seed.base.attr = NTDB_ATTRIBUTE_SEED;
        seed.base.next = NULL;
        seed.seed.seed = 0;
 
-       log.base.attr = TDB_ATTRIBUTE_LOG;
+       log.base.attr = NTDB_ATTRIBUTE_LOG;
        log.base.next = &seed;
-       log.log.fn = tdb_log;
+       log.log.fn = ntdb_log;
 
        if (argv[1] && strcmp(argv[1], "--internal") == 0) {
-               flags = TDB_INTERNAL;
+               flags = NTDB_INTERNAL;
                argc--;
                argv++;
        }
@@ -168,7 +168,7 @@ int main(int argc, char *argv[])
                argv++;
        }
        if (argv[1] && strcmp(argv[1], "--no-sync") == 0) {
-               flags |= TDB_NOSYNC;
+               flags |= NTDB_NOSYNC;
                argc--;
                argv++;
        }
@@ -183,10 +183,10 @@ int main(int argc, char *argv[])
                argv++;
        }
 
-       tdb = tdb_open("/tmp/speed.tdb", flags, O_RDWR|O_CREAT|O_TRUNC,
+       ntdb = ntdb_open("/tmp/speed.ntdb", flags, O_RDWR|O_CREAT|O_TRUNC,
                       0600, &log);
-       if (!tdb)
-               err(1, "Opening /tmp/speed.tdb");
+       if (!ntdb)
+               err(1, "Opening /tmp/speed.ntdb");
 
        key.dptr = (void *)&i;
        key.dsize = sizeof(i);
@@ -206,199 +206,199 @@ int main(int argc, char *argv[])
 
        /* Add 1000 records. */
        printf("Adding %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+               errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
        gettimeofday(&start, NULL);
        for (i = 0; i < num; i++)
-               if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0)
-                       errx(1, "Inserting key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
+               if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0)
+                       errx(1, "Inserting key %u in ntdb: %s",
+                            i, ntdb_errorstr(ecode));
        gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+               errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
        printf(" %zu ns (%zu bytes)\n",
               normalize(&start, &stop, num), file_size());
 
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
+       if (ntdb_check(ntdb, NULL, NULL))
+               errx(1, "ntdb_check failed!");
        if (summary) {
                char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
+               ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
                printf("%s\n", sumstr);
                free(sumstr);
        }
        if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
+               dump_and_clear_stats(&ntdb, flags, &log);
 
        if (++stage == stopat)
                exit(0);
 
        /* Finding 1000 records. */
        printf("Finding %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+               errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
        gettimeofday(&start, NULL);
        for (i = 0; i < num; i++) {
-               struct tdb_data dbuf;
-               if ((ecode = tdb_fetch(tdb, key, &dbuf)) != TDB_SUCCESS
+               NTDB_DATA dbuf;
+               if ((ecode = ntdb_fetch(ntdb, key, &dbuf)) != NTDB_SUCCESS
                    || *(int *)dbuf.dptr != i) {
-                       errx(1, "Fetching key %u in tdb gave %u",
+                       errx(1, "Fetching key %u in ntdb gave %u",
                             i, ecode ? ecode : *(int *)dbuf.dptr);
                }
        }
        gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+               errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
        printf(" %zu ns (%zu bytes)\n",
               normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
+       if (ntdb_check(ntdb, NULL, NULL))
+               errx(1, "ntdb_check failed!");
        if (summary) {
                char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
+               ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
                printf("%s\n", sumstr);
                free(sumstr);
        }
        if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
+               dump_and_clear_stats(&ntdb, flags, &log);
        if (++stage == stopat)
                exit(0);
 
        /* Missing 1000 records. */
        printf("Missing %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+               errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
        gettimeofday(&start, NULL);
        for (i = num; i < num*2; i++) {
-               struct tdb_data dbuf;
-               ecode = tdb_fetch(tdb, key, &dbuf);
-               if (ecode != TDB_ERR_NOEXIST)
-                       errx(1, "Fetching key %u in tdb gave %s",
-                            i, tdb_errorstr(ecode));
+               NTDB_DATA dbuf;
+               ecode = ntdb_fetch(ntdb, key, &dbuf);
+               if (ecode != NTDB_ERR_NOEXIST)
+                       errx(1, "Fetching key %u in ntdb gave %s",
+                            i, ntdb_errorstr(ecode));
        }
        gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+               errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
        printf(" %zu ns (%zu bytes)\n",
               normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
+       if (ntdb_check(ntdb, NULL, NULL))
+               errx(1, "ntdb_check failed!");
        if (summary) {
                char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
+               ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
                printf("%s\n", sumstr);
                free(sumstr);
        }
        if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
+               dump_and_clear_stats(&ntdb, flags, &log);
        if (++stage == stopat)
                exit(0);
 
        /* Traverse 1000 records. */
        printf("Traversing %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+               errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
        i = 0;
        gettimeofday(&start, NULL);
-       if (tdb_traverse(tdb, count_record, &i) != num)
+       if (ntdb_traverse(ntdb, count_record, &i) != num)
                errx(1, "Traverse returned wrong number of records");
        if (i != (num - 1) * (num / 2))
                errx(1, "Traverse tallied to %u", i);
        gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+               errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
        printf(" %zu ns (%zu bytes)\n",
               normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
+       if (ntdb_check(ntdb, NULL, NULL))
+               errx(1, "ntdb_check failed!");
        if (summary) {
                char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
+               ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
                printf("%s\n", sumstr);
                free(sumstr);
        }
        if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
+               dump_and_clear_stats(&ntdb, flags, &log);
        if (++stage == stopat)
                exit(0);
 
        /* Delete 1000 records (not in order). */
        printf("Deleting %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+               errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
        gettimeofday(&start, NULL);
        for (j = 0; j < num; j++) {
                i = (j + 100003) % num;
-               if ((ecode = tdb_delete(tdb, key)) != TDB_SUCCESS)
-                       errx(1, "Deleting key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
+               if ((ecode = ntdb_delete(ntdb, key)) != NTDB_SUCCESS)
+                       errx(1, "Deleting key %u in ntdb: %s",
+                            i, ntdb_errorstr(ecode));
        }
        gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+               errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
        printf(" %zu ns (%zu bytes)\n",
               normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
+       if (ntdb_check(ntdb, NULL, NULL))
+               errx(1, "ntdb_check failed!");
        if (summary) {
                char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
+               ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
                printf("%s\n", sumstr);
                free(sumstr);
        }
        if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
+               dump_and_clear_stats(&ntdb, flags, &log);
        if (++stage == stopat)
                exit(0);
 
        /* Re-add 1000 records (not in order). */
        printf("Re-adding %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+               errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
        gettimeofday(&start, NULL);
        for (j = 0; j < num; j++) {
                i = (j + 100003) % num;
-               if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0)
-                       errx(1, "Inserting key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
+               if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0)
+                       errx(1, "Inserting key %u in ntdb: %s",
+                            i, ntdb_errorstr(ecode));
        }
        gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+               errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
        printf(" %zu ns (%zu bytes)\n",
               normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
+       if (ntdb_check(ntdb, NULL, NULL))
+               errx(1, "ntdb_check failed!");
        if (summary) {
                char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
+               ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
                printf("%s\n", sumstr);
                free(sumstr);
        }
        if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
+               dump_and_clear_stats(&ntdb, flags, &log);
        if (++stage == stopat)
                exit(0);
 
        /* Append 1000 records. */
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+               errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
        printf("Appending %u records: ", num); fflush(stdout);
        gettimeofday(&start, NULL);
        for (i = 0; i < num; i++)
-               if ((ecode = tdb_append(tdb, key, data)) != TDB_SUCCESS)
-                       errx(1, "Appending key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
+               if ((ecode = ntdb_append(ntdb, key, data)) != NTDB_SUCCESS)
+                       errx(1, "Appending key %u in ntdb: %s",
+                            i, ntdb_errorstr(ecode));
        gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+               errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
        printf(" %zu ns (%zu bytes)\n",
               normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
+       if (ntdb_check(ntdb, NULL, NULL))
+               errx(1, "ntdb_check failed!");
        if (summary) {
                char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
+               ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
                printf("%s\n", sumstr);
                free(sumstr);
        }
@@ -406,36 +406,36 @@ int main(int argc, char *argv[])
                exit(0);
 
        /* Churn 1000 records: not in order! */
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+               errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
        printf("Churning %u records: ", num); fflush(stdout);
        gettimeofday(&start, NULL);
        for (j = 0; j < num; j++) {
                i = (j + 1000019) % num;
-               if ((ecode = tdb_delete(tdb, key)) != TDB_SUCCESS)
-                       errx(1, "Deleting key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
+               if ((ecode = ntdb_delete(ntdb, key)) != NTDB_SUCCESS)
+                       errx(1, "Deleting key %u in ntdb: %s",
+                            i, ntdb_errorstr(ecode));
                i += num;
-               if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0)
-                       errx(1, "Inserting key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
+               if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0)
+                       errx(1, "Inserting key %u in ntdb: %s",
+                            i, ntdb_errorstr(ecode));
        }
        gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
+       if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+               errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
        printf(" %zu ns (%zu bytes)\n",
               normalize(&start, &stop, num), file_size());
 
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
+       if (ntdb_check(ntdb, NULL, NULL))
+               errx(1, "ntdb_check failed!");
        if (summary) {
                char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
+               ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
                printf("%s\n", sumstr);
                free(sumstr);
        }
        if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
+               dump_and_clear_stats(&ntdb, flags, &log);
        if (++stage == stopat)
                exit(0);
 
diff --git a/lib/ntdb/transaction.c b/lib/ntdb/transaction.c
new file mode 100644 (file)
index 0000000..76408c3
--- /dev/null
@@ -0,0 +1,1322 @@
+ /*
+   Unix SMB/CIFS implementation.
+
+   trivial database library
+
+   Copyright (C) Andrew Tridgell              2005
+   Copyright (C) Rusty Russell                2010
+
+     ** NOTE! The following LGPL license applies to the ntdb
+     ** library. This does NOT imply that all of Samba is released
+     ** under the LGPL
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "private.h"
+#define SAFE_FREE(x) do { if ((x) != NULL) {free((void *)x); (x)=NULL;} } while(0)
+
+/*
+  transaction design:
+
+  - only allow a single transaction at a time per database. This makes
+    using the transaction API simpler, as otherwise the caller would
+    have to cope with temporary failures in transactions that conflict
+    with other current transactions
+
+  - keep the transaction recovery information in the same file as the
+    database, using a special 'transaction recovery' record pointed at
+    by the header. This removes the need for extra journal files as
+    used by some other databases
+
+  - dynamically allocated the transaction recover record, re-using it
+    for subsequent transactions. If a larger record is needed then
+    ntdb_free() the old record to place it on the normal ntdb freelist
+    before allocating the new record
+
+  - during transactions, keep a linked list of writes all that have
+    been performed by intercepting all ntdb_write() calls. The hooked
+    transaction versions of ntdb_read() and ntdb_write() check this
+    linked list and try to use the elements of the list in preference
+    to the real database.
+
+  - don't allow any locks to be held when a transaction starts,
+    otherwise we can end up with deadlock (plus lack of lock nesting
+    in POSIX locks would mean the lock is lost)
+
+  - if the caller gains a lock during the transaction but doesn't
+    release it then fail the commit
+
+  - allow for nested calls to ntdb_transaction_start(), re-using the
+    existing transaction record. If the inner transaction is canceled
+    then a subsequent commit will fail
+
+  - keep a mirrored copy of the ntdb hash chain heads to allow for the
+    fast hash heads scan on traverse, updating the mirrored copy in
+    the transaction version of ntdb_write
+
+  - allow callers to mix transaction and non-transaction use of ntdb,
+    although once a transaction is started then an exclusive lock is
+    gained until the transaction is committed or canceled
+
+  - the commit stategy involves first saving away all modified data
+    into a linearised buffer in the transaction recovery area, then
+    marking the transaction recovery area with a magic value to
+    indicate a valid recovery record. In total 4 fsync/msync calls are
+    needed per commit to prevent race conditions. It might be possible
+    to reduce this to 3 or even 2 with some more work.
+
+  - check for a valid recovery record on open of the ntdb, while the
+    open lock is held. Automatically recover from the transaction
+    recovery area if needed, then continue with the open as
+    usual. This allows for smooth crash recovery with no administrator
+    intervention.
+
+  - if NTDB_NOSYNC is passed to flags in ntdb_open then transactions are
+    still available, but no transaction recovery area is used and no
+    fsync/msync calls are made.
+*/
+
+/*
+  hold the context of any current transaction
+*/
+struct ntdb_transaction {
+       /* the original io methods - used to do IOs to the real db */
+       const struct ntdb_methods *io_methods;
+
+       /* the list of transaction blocks. When a block is first
+          written to, it gets created in this list */
+       uint8_t **blocks;
+       size_t num_blocks;
+       size_t last_block_size; /* number of valid bytes in the last block */
+
+       /* non-zero when an internal transaction error has
+          occurred. All write operations will then fail until the
+          transaction is ended */
+       int transaction_error;
+
+       /* when inside a transaction we need to keep track of any
+          nested ntdb_transaction_start() calls, as these are allowed,
+          but don't create a new transaction */
+       unsigned int nesting;
+
+       /* set when a prepare has already occurred */
+       bool prepared;
+       ntdb_off_t magic_offset;
+
+       /* old file size before transaction */
+       ntdb_len_t old_map_size;
+};
+
+/* This doesn't really need to be pagesize, but we use it for similar reasons. */
+#define PAGESIZE 65536
+
+/*
+  read while in a transaction. We need to check first if the data is in our list
+  of transaction elements, then if not do a real read
+*/
+static enum NTDB_ERROR transaction_read(struct ntdb_context *ntdb, ntdb_off_t off,
+                                      void *buf, ntdb_len_t len)
+{
+       size_t blk;
+       enum NTDB_ERROR ecode;
+
+       /* break it down into block sized ops */
+       while (len + (off % PAGESIZE) > PAGESIZE) {
+               ntdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
+               ecode = transaction_read(ntdb, off, buf, len2);
+               if (ecode != NTDB_SUCCESS) {
+                       return ecode;
+               }
+               len -= len2;
+               off += len2;
+               buf = (void *)(len2 + (char *)buf);
+       }
+
+       if (len == 0) {
+               return NTDB_SUCCESS;
+       }
+
+       blk = off / PAGESIZE;
+
+       /* see if we have it in the block list */
+       if (ntdb->transaction->num_blocks <= blk ||
+           ntdb->transaction->blocks[blk] == NULL) {
+               /* nope, do a real read */
+               ecode = ntdb->transaction->io_methods->tread(ntdb, off, buf, len);
+               if (ecode != NTDB_SUCCESS) {
+                       goto fail;
+               }
+               return 0;
+       }
+
+       /* it is in the block list. Now check for the last block */
+       if (blk == ntdb->transaction->num_blocks-1) {
+               if (len > ntdb->transaction->last_block_size) {
+                       ecode = NTDB_ERR_IO;
+                       goto fail;
+               }
+       }
+
+       /* now copy it out of this block */
+       memcpy(buf, ntdb->transaction->blocks[blk] + (off % PAGESIZE), len);
+       return NTDB_SUCCESS;
+
+fail:
+       ntdb->transaction->transaction_error = 1;
+       return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                         "transaction_read: failed at off=%zu len=%zu",
+                         (size_t)off, (size_t)len);
+}
+
+
+/*
+  write while in a transaction
+*/
+static enum NTDB_ERROR transaction_write(struct ntdb_context *ntdb, ntdb_off_t off,
+                                       const void *buf, ntdb_len_t len)
+{
+       size_t blk;
+       enum NTDB_ERROR ecode;
+
+       /* Only a commit is allowed on a prepared transaction */
+       if (ntdb->transaction->prepared) {
+               ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_ERROR,
+                                  "transaction_write: transaction already"
+                                  " prepared, write not allowed");
+               goto fail;
+       }
+
+       /* break it up into block sized chunks */
+       while (len + (off % PAGESIZE) > PAGESIZE) {
+               ntdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
+               ecode = transaction_write(ntdb, off, buf, len2);
+               if (ecode != NTDB_SUCCESS) {
+                       return ecode;
+               }
+               len -= len2;
+               off += len2;
+               if (buf != NULL) {
+                       buf = (const void *)(len2 + (const char *)buf);
+               }
+       }
+
+       if (len == 0) {
+               return NTDB_SUCCESS;
+       }
+
+       blk = off / PAGESIZE;
+       off = off % PAGESIZE;
+
+       if (ntdb->transaction->num_blocks <= blk) {
+               uint8_t **new_blocks;
+               /* expand the blocks array */
+               if (ntdb->transaction->blocks == NULL) {
+                       new_blocks = (uint8_t **)malloc(
+                               (blk+1)*sizeof(uint8_t *));
+               } else {
+                       new_blocks = (uint8_t **)realloc(
+                               ntdb->transaction->blocks,
+                               (blk+1)*sizeof(uint8_t *));
+               }
+               if (new_blocks == NULL) {
+                       ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                          "transaction_write:"
+                                          " failed to allocate");
+                       goto fail;
+               }
+               memset(&new_blocks[ntdb->transaction->num_blocks], 0,
+                      (1+(blk - ntdb->transaction->num_blocks))*sizeof(uint8_t *));
+               ntdb->transaction->blocks = new_blocks;
+               ntdb->transaction->num_blocks = blk+1;
+               ntdb->transaction->last_block_size = 0;
+       }
+
+       /* allocate and fill a block? */
+       if (ntdb->transaction->blocks[blk] == NULL) {
+               ntdb->transaction->blocks[blk] = (uint8_t *)calloc(PAGESIZE, 1);
+               if (ntdb->transaction->blocks[blk] == NULL) {
+                       ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                          "transaction_write:"
+                                          " failed to allocate");
+                       goto fail;
+               }
+               if (ntdb->transaction->old_map_size > blk * PAGESIZE) {
+                       ntdb_len_t len2 = PAGESIZE;
+                       if (len2 + (blk * PAGESIZE) > ntdb->transaction->old_map_size) {
+                               len2 = ntdb->transaction->old_map_size - (blk * PAGESIZE);
+                       }
+                       ecode = ntdb->transaction->io_methods->tread(ntdb,
+                                       blk * PAGESIZE,
+                                       ntdb->transaction->blocks[blk],
+                                       len2);
+                       if (ecode != NTDB_SUCCESS) {
+                               ecode = ntdb_logerr(ntdb, ecode,
+                                                  NTDB_LOG_ERROR,
+                                                  "transaction_write:"
+                                                  " failed to"
+                                                  " read old block: %s",
+                                                  strerror(errno));
+                               SAFE_FREE(ntdb->transaction->blocks[blk]);
+                               goto fail;
+                       }
+                       if (blk == ntdb->transaction->num_blocks-1) {
+                               ntdb->transaction->last_block_size = len2;
+                       }
+               }
+       }
+
+       /* overwrite part of an existing block */
+       if (buf == NULL) {
+               memset(ntdb->transaction->blocks[blk] + off, 0, len);
+       } else {
+               memcpy(ntdb->transaction->blocks[blk] + off, buf, len);
+       }
+       if (blk == ntdb->transaction->num_blocks-1) {
+               if (len + off > ntdb->transaction->last_block_size) {
+                       ntdb->transaction->last_block_size = len + off;
+               }
+       }
+
+       return NTDB_SUCCESS;
+
+fail:
+       ntdb->transaction->transaction_error = 1;
+       return ecode;
+}
+
+
+/*
+  write while in a transaction - this variant never expands the transaction blocks, it only
+  updates existing blocks. This means it cannot change the recovery size
+*/
+static void transaction_write_existing(struct ntdb_context *ntdb, ntdb_off_t off,
+                                      const void *buf, ntdb_len_t len)
+{
+       size_t blk;
+
+       /* break it up into block sized chunks */
+       while (len + (off % PAGESIZE) > PAGESIZE) {
+               ntdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
+               transaction_write_existing(ntdb, off, buf, len2);
+               len -= len2;
+               off += len2;
+               if (buf != NULL) {
+                       buf = (const void *)(len2 + (const char *)buf);
+               }
+       }
+
+       if (len == 0) {
+               return;
+       }
+
+       blk = off / PAGESIZE;
+       off = off % PAGESIZE;
+
+       if (ntdb->transaction->num_blocks <= blk ||
+           ntdb->transaction->blocks[blk] == NULL) {
+               return;
+       }
+
+       if (blk == ntdb->transaction->num_blocks-1 &&
+           off + len > ntdb->transaction->last_block_size) {
+               if (off >= ntdb->transaction->last_block_size) {
+                       return;
+               }
+               len = ntdb->transaction->last_block_size - off;
+       }
+
+       /* overwrite part of an existing block */
+       memcpy(ntdb->transaction->blocks[blk] + off, buf, len);
+}
+
+
+/*
+  out of bounds check during a transaction
+*/
+static enum NTDB_ERROR transaction_oob(struct ntdb_context *ntdb,
+                                     ntdb_off_t off, ntdb_len_t len, bool probe)
+{
+       if ((off + len >= off && off + len <= ntdb->file->map_size) || probe) {
+               return NTDB_SUCCESS;
+       }
+
+       ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                  "ntdb_oob len %lld beyond transaction size %lld",
+                  (long long)(off + len),
+                  (long long)ntdb->file->map_size);
+       return NTDB_ERR_IO;
+}
+
+/*
+  transaction version of ntdb_expand().
+*/
+static enum NTDB_ERROR transaction_expand_file(struct ntdb_context *ntdb,
+                                             ntdb_off_t addition)
+{
+       enum NTDB_ERROR ecode;
+
+       /* add a write to the transaction elements, so subsequent
+          reads see the zero data */
+       ecode = transaction_write(ntdb, ntdb->file->map_size, NULL, addition);
+       if (ecode == NTDB_SUCCESS) {
+               ntdb->file->map_size += addition;
+       }
+       return ecode;
+}
+
+static void *transaction_direct(struct ntdb_context *ntdb, ntdb_off_t off,
+                               size_t len, bool write_mode)
+{
+       size_t blk = off / PAGESIZE, end_blk;
+
+       /* This is wrong for zero-length blocks, but will fail gracefully */
+       end_blk = (off + len - 1) / PAGESIZE;
+
+       /* Can only do direct if in single block and we've already copied. */
+       if (write_mode) {
+               ntdb->stats.transaction_write_direct++;
+               if (blk != end_blk
+                   || blk >= ntdb->transaction->num_blocks
+                   || ntdb->transaction->blocks[blk] == NULL) {
+                       ntdb->stats.transaction_write_direct_fail++;
+                       return NULL;
+               }
+               return ntdb->transaction->blocks[blk] + off % PAGESIZE;
+       }
+
+       ntdb->stats.transaction_read_direct++;
+       /* Single which we have copied? */
+       if (blk == end_blk
+           && blk < ntdb->transaction->num_blocks
+           && ntdb->transaction->blocks[blk])
+               return ntdb->transaction->blocks[blk] + off % PAGESIZE;
+
+       /* Otherwise must be all not copied. */
+       while (blk <= end_blk) {
+               if (blk >= ntdb->transaction->num_blocks)
+                       break;
+               if (ntdb->transaction->blocks[blk]) {
+                       ntdb->stats.transaction_read_direct_fail++;
+                       return NULL;
+               }
+               blk++;
+       }
+       return ntdb->transaction->io_methods->direct(ntdb, off, len, false);
+}
+
+static const struct ntdb_methods transaction_methods = {
+       transaction_read,
+       transaction_write,
+       transaction_oob,
+       transaction_expand_file,
+       transaction_direct,
+};
+
+/*
+  sync to disk
+*/
+static enum NTDB_ERROR transaction_sync(struct ntdb_context *ntdb,
+                                      ntdb_off_t offset, ntdb_len_t length)
+{
+       if (ntdb->flags & NTDB_NOSYNC) {
+               return NTDB_SUCCESS;
+       }
+
+       if (fsync(ntdb->file->fd) != 0) {
+               return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                 "ntdb_transaction: fsync failed: %s",
+                                 strerror(errno));
+       }
+#ifdef MS_SYNC
+       if (ntdb->file->map_ptr) {
+               ntdb_off_t moffset = offset & ~(getpagesize()-1);
+               if (msync(moffset + (char *)ntdb->file->map_ptr,
+                         length + (offset - moffset), MS_SYNC) != 0) {
+                       return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+                                         "ntdb_transaction: msync failed: %s",
+                                         strerror(errno));
+               }
+       }
+#endif
+       return NTDB_SUCCESS;
+}
+
+
+static void _ntdb_transaction_cancel(struct ntdb_context *ntdb)
+{
+       int i;
+       enum NTDB_ERROR ecode;
+
+       if (ntdb->transaction == NULL) {
+               ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+                          "ntdb_transaction_cancel: no transaction");
+               return;
+       }
+
+       if (ntdb->transaction->nesting != 0) {
+               ntdb->transaction->transaction_error = 1;
+               ntdb->transaction->nesting--;
+               return;
+       }
+
+       ntdb->file->map_size = ntdb->transaction->old_map_size;
+
+       /* free all the transaction blocks */
+       for (i=0;i<ntdb->transaction->num_blocks;i++) {
+               if (ntdb->transaction->blocks[i] != NULL) {
+                       free(ntdb->transaction->blocks[i]);
+               }
+       }
+       SAFE_FREE(ntdb->transaction->blocks);
+
+       if (ntdb->transaction->magic_offset) {
+               const struct ntdb_methods *methods = ntdb->transaction->io_methods;
+               uint64_t invalid = NTDB_RECOVERY_INVALID_MAGIC;
+
+               /* remove the recovery marker */
+               ecode = methods->twrite(ntdb, ntdb->transaction->magic_offset,
+                                       &invalid, sizeof(invalid));
+               if (ecode == NTDB_SUCCESS)
+                       ecode = transaction_sync(ntdb,
+                                                ntdb->transaction->magic_offset,
+                                                sizeof(invalid));
+               if (ecode != NTDB_SUCCESS) {
+                       ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                  "ntdb_transaction_cancel: failed to remove"
+                                  " recovery magic");
+               }
+       }
+
+       if (ntdb->file->allrecord_lock.count)
+               ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype);
+
+       /* restore the normal io methods */
+       ntdb->io = ntdb->transaction->io_methods;
+
+       ntdb_transaction_unlock(ntdb, F_WRLCK);
+
+       if (ntdb_has_open_lock(ntdb))
+               ntdb_unlock_open(ntdb, F_WRLCK);
+
+       SAFE_FREE(ntdb->transaction);
+}
+
+/*
+  start a ntdb transaction. No token is returned, as only a single
+  transaction is allowed to be pending per ntdb_context
+*/
+_PUBLIC_ enum NTDB_ERROR ntdb_transaction_start(struct ntdb_context *ntdb)
+{
+       enum NTDB_ERROR ecode;
+
+       ntdb->stats.transactions++;
+       /* some sanity checks */
+       if (ntdb->flags & NTDB_INTERNAL) {
+               return ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                                   NTDB_LOG_USE_ERROR,
+                                                   "ntdb_transaction_start:"
+                                                   " cannot start a"
+                                                   " transaction on an"
+                                                   " internal ntdb");
+       }
+
+       if (ntdb->flags & NTDB_RDONLY) {
+               return ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_RDONLY,
+                                                   NTDB_LOG_USE_ERROR,
+                                                   "ntdb_transaction_start:"
+                                                   " cannot start a"
+                                                   " transaction on a "
+                                                   " read-only ntdb");
+       }
+
+       /* cope with nested ntdb_transaction_start() calls */
+       if (ntdb->transaction != NULL) {
+               if (!(ntdb->flags & NTDB_ALLOW_NESTING)) {
+                       return ntdb->last_error
+                               = ntdb_logerr(ntdb, NTDB_ERR_IO,
+                                            NTDB_LOG_USE_ERROR,
+                                            "ntdb_transaction_start:"
+                                            " already inside transaction");
+               }
+               ntdb->transaction->nesting++;
+               ntdb->stats.transaction_nest++;
+               return 0;
+       }
+
+       if (ntdb_has_hash_locks(ntdb)) {
+               /* the caller must not have any locks when starting a
+                  transaction as otherwise we'll be screwed by lack
+                  of nested locks in POSIX */
+               return ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_LOCK,
+                                                   NTDB_LOG_USE_ERROR,
+                                                   "ntdb_transaction_start:"
+                                                   " cannot start a"
+                                                   " transaction with locks"
+                                                   " held");
+       }
+
+       ntdb->transaction = (struct ntdb_transaction *)
+               calloc(sizeof(struct ntdb_transaction), 1);
+       if (ntdb->transaction == NULL) {
+               return ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_OOM,
+                                                   NTDB_LOG_ERROR,
+                                                   "ntdb_transaction_start:"
+                                                   " cannot allocate");
+       }
+
+       /* get the transaction write lock. This is a blocking lock. As
+          discussed with Volker, there are a number of ways we could
+          make this async, which we will probably do in the future */
+       ecode = ntdb_transaction_lock(ntdb, F_WRLCK);
+       if (ecode != NTDB_SUCCESS) {
+               SAFE_FREE(ntdb->transaction->blocks);
+               SAFE_FREE(ntdb->transaction);
+               return ntdb->last_error = ecode;
+       }
+
+       /* get a read lock over entire file. This is upgraded to a write
+          lock during the commit */
+       ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, true);
+       if (ecode != NTDB_SUCCESS) {
+               goto fail_allrecord_lock;
+       }
+
+       /* make sure we know about any file expansions already done by
+          anyone else */
+       ntdb->io->oob(ntdb, ntdb->file->map_size, 1, true);
+       ntdb->transaction->old_map_size = ntdb->file->map_size;
+
+       /* finally hook the io methods, replacing them with
+          transaction specific methods */
+       ntdb->transaction->io_methods = ntdb->io;
+       ntdb->io = &transaction_methods;
+       return ntdb->last_error = NTDB_SUCCESS;
+
+fail_allrecord_lock:
+       ntdb_transaction_unlock(ntdb, F_WRLCK);
+       SAFE_FREE(ntdb->transaction->blocks);
+       SAFE_FREE(ntdb->transaction);
+       return ntdb->last_error = ecode;
+}
+
+
+/*
+  cancel the current transaction
+*/
+_PUBLIC_ void ntdb_transaction_cancel(struct ntdb_context *ntdb)
+{
+       ntdb->stats.transaction_cancel++;
+       _ntdb_transaction_cancel(ntdb);
+}
+
+/*
+  work out how much space the linearised recovery data will consume (worst case)
+*/
+static ntdb_len_t ntdb_recovery_size(struct ntdb_context *ntdb)
+{
+       ntdb_len_t recovery_size = 0;
+       int i;
+
+       recovery_size = 0;
+       for (i=0;i<ntdb->transaction->num_blocks;i++) {
+               if (i * PAGESIZE >= ntdb->transaction->old_map_size) {
+                       break;
+               }
+               if (ntdb->transaction->blocks[i] == NULL) {
+                       continue;
+               }
+               recovery_size += 2*sizeof(ntdb_off_t);
+               if (i == ntdb->transaction->num_blocks-1) {
+                       recovery_size += ntdb->transaction->last_block_size;
+               } else {
+                       recovery_size += PAGESIZE;
+               }
+       }
+
+       return recovery_size;
+}
+
+static enum NTDB_ERROR ntdb_recovery_area(struct ntdb_context *ntdb,
+                                       const struct ntdb_methods *methods,
+                                       ntdb_off_t *recovery_offset,
+                                       struct ntdb_recovery_record *rec)
+{
+       enum NTDB_ERROR ecode;
+
+       *recovery_offset = ntdb_read_off(ntdb,
+                                       offsetof(struct ntdb_header, recovery));
+       if (NTDB_OFF_IS_ERR(*recovery_offset)) {
+               return NTDB_OFF_TO_ERR(*recovery_offset);
+       }
+
+       if (*recovery_offset == 0) {
+               rec->max_len = 0;
+               return NTDB_SUCCESS;
+       }
+
+       ecode = methods->tread(ntdb, *recovery_offset, rec, sizeof(*rec));
+       if (ecode != NTDB_SUCCESS)
+               return ecode;
+
+       ntdb_convert(ntdb, rec, sizeof(*rec));
+       /* ignore invalid recovery regions: can happen in crash */
+       if (rec->magic != NTDB_RECOVERY_MAGIC &&
+           rec->magic != NTDB_RECOVERY_INVALID_MAGIC) {
+               *recovery_offset = 0;
+               rec->max_len = 0;
+       }
+       return NTDB_SUCCESS;
+}
+
+static unsigned int same(const unsigned char *new,
+                        const unsigned char *old,
+                        unsigned int length)
+{
+       unsigned int i;
+
+       for (i = 0; i < length; i++) {
+               if (new[i] != old[i])
+                       break;
+       }
+       return i;
+}
+
+static unsigned int different(const unsigned char *new,
+                             const unsigned char *old,
+                             unsigned int length,
+                             unsigned int min_same,
+                             unsigned int *samelen)
+{
+       unsigned int i;
+
+       *samelen = 0;
+       for (i = 0; i < length; i++) {
+               if (new[i] == old[i]) {
+                       (*samelen)++;
+               } else {
+                       if (*samelen >= min_same) {
+                               return i - *samelen;
+                       }
+                       *samelen = 0;
+               }
+       }
+
+       if (*samelen < min_same)
+               *samelen = 0;
+       return length - *samelen;
+}
+
+/* Allocates recovery blob, without ntdb_recovery_record at head set up. */
+static struct ntdb_recovery_record *alloc_recovery(struct ntdb_context *ntdb,
+                                                 ntdb_len_t *len)
+{
+       struct ntdb_recovery_record *rec;
+       size_t i;
+       enum NTDB_ERROR ecode;
+       unsigned char *p;
+       const struct ntdb_methods *old_methods = ntdb->io;
+
+       rec = malloc(sizeof(*rec) + ntdb_recovery_size(ntdb));
+       if (!rec) {
+               ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                          "transaction_setup_recovery:"
+                          " cannot allocate");
+               return NTDB_ERR_PTR(NTDB_ERR_OOM);
+       }
+
+       /* We temporarily revert to the old I/O methods, so we can use
+        * ntdb_access_read */
+       ntdb->io = ntdb->transaction->io_methods;
+
+       /* build the recovery data into a single blob to allow us to do a single
+          large write, which should be more efficient */
+       p = (unsigned char *)(rec + 1);
+       for (i=0;i<ntdb->transaction->num_blocks;i++) {
+               ntdb_off_t offset;
+               ntdb_len_t length;
+               unsigned int off;
+               const unsigned char *buffer;
+
+               if (ntdb->transaction->blocks[i] == NULL) {
+                       continue;
+               }
+
+               offset = i * PAGESIZE;
+               length = PAGESIZE;
+               if (i == ntdb->transaction->num_blocks-1) {
+                       length = ntdb->transaction->last_block_size;
+               }
+
+               if (offset >= ntdb->transaction->old_map_size) {
+                       continue;
+               }
+
+               if (offset + length > ntdb->file->map_size) {
+                       ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                          "ntdb_transaction_setup_recovery:"
+                                          " transaction data over new region"
+                                          " boundary");
+                       goto fail;
+               }
+               if (offset + length > ntdb->transaction->old_map_size) {
+                       /* Short read at EOF. */
+                       length = ntdb->transaction->old_map_size - offset;
+               }
+               buffer = ntdb_access_read(ntdb, offset, length, false);
+               if (NTDB_PTR_IS_ERR(buffer)) {
+                       ecode = NTDB_PTR_ERR(buffer);
+                       goto fail;
+               }
+
+               /* Skip over anything the same at the start. */
+               off = same(ntdb->transaction->blocks[i], buffer, length);
+               offset += off;
+
+               while (off < length) {
+                       ntdb_len_t len1;
+                       unsigned int samelen;
+
+                       len1 = different(ntdb->transaction->blocks[i] + off,
+                                       buffer + off, length - off,
+                                       sizeof(offset) + sizeof(len1) + 1,
+                                       &samelen);
+
+                       memcpy(p, &offset, sizeof(offset));
+                       memcpy(p + sizeof(offset), &len1, sizeof(len1));
+                       ntdb_convert(ntdb, p, sizeof(offset) + sizeof(len1));
+                       p += sizeof(offset) + sizeof(len1);
+                       memcpy(p, buffer + off, len1);
+                       p += len1;
+                       off += len1 + samelen;
+                       offset += len1 + samelen;
+               }
+               ntdb_access_release(ntdb, buffer);
+       }
+
+       *len = p - (unsigned char *)(rec + 1);
+       ntdb->io = old_methods;
+       return rec;
+
+fail:
+       free(rec);
+       ntdb->io = old_methods;
+       return NTDB_ERR_PTR(ecode);
+}
+
+static ntdb_off_t create_recovery_area(struct ntdb_context *ntdb,
+                                     ntdb_len_t rec_length,
+                                     struct ntdb_recovery_record *rec)
+{
+       ntdb_off_t off, recovery_off;
+       ntdb_len_t addition;
+       enum NTDB_ERROR ecode;
+       const struct ntdb_methods *methods = ntdb->transaction->io_methods;
+
+       /* round up to a multiple of page size. Overallocate, since each
+        * such allocation forces us to expand the file. */
+       rec->max_len = ntdb_expand_adjust(ntdb->file->map_size, rec_length);
+
+       /* Round up to a page. */
+       rec->max_len = ((sizeof(*rec) + rec->max_len + PAGESIZE-1)
+                       & ~(PAGESIZE-1))
+               - sizeof(*rec);
+
+       off = ntdb->file->map_size;
+
+       /* Restore ->map_size before calling underlying expand_file.
+          Also so that we don't try to expand the file again in the
+          transaction commit, which would destroy the recovery
+          area */
+       addition = (ntdb->file->map_size - ntdb->transaction->old_map_size) +
+               sizeof(*rec) + rec->max_len;
+       ntdb->file->map_size = ntdb->transaction->old_map_size;
+       ntdb->stats.transaction_expand_file++;
+       ecode = methods->expand_file(ntdb, addition);
+       if (ecode != NTDB_SUCCESS) {
+               ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                          "ntdb_recovery_allocate:"
+                          " failed to create recovery area");
+               return NTDB_ERR_TO_OFF(ecode);
+       }
+
+       /* we have to reset the old map size so that we don't try to
+          expand the file again in the transaction commit, which
+          would destroy the recovery area */
+       ntdb->transaction->old_map_size = ntdb->file->map_size;
+
+       /* write the recovery header offset and sync - we can sync without a race here
+          as the magic ptr in the recovery record has not been set */
+       recovery_off = off;
+       ntdb_convert(ntdb, &recovery_off, sizeof(recovery_off));
+       ecode = methods->twrite(ntdb, offsetof(struct ntdb_header, recovery),
+                               &recovery_off, sizeof(ntdb_off_t));
+       if (ecode != NTDB_SUCCESS) {
+               ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                          "ntdb_recovery_allocate:"
+                          " failed to write recovery head");
+               return NTDB_ERR_TO_OFF(ecode);
+       }
+       transaction_write_existing(ntdb, offsetof(struct ntdb_header, recovery),
+                                  &recovery_off,
+                                  sizeof(ntdb_off_t));
+       return off;
+}
+
+/*
+  setup the recovery data that will be used on a crash during commit
+*/
+static enum NTDB_ERROR transaction_setup_recovery(struct ntdb_context *ntdb)
+{
+       ntdb_len_t recovery_size = 0;
+       ntdb_off_t recovery_off = 0;
+       ntdb_off_t old_map_size = ntdb->transaction->old_map_size;
+       struct ntdb_recovery_record *recovery;
+       const struct ntdb_methods *methods = ntdb->transaction->io_methods;
+       uint64_t magic;
+       enum NTDB_ERROR ecode;
+
+       recovery = alloc_recovery(ntdb, &recovery_size);
+       if (NTDB_PTR_IS_ERR(recovery))
+               return NTDB_PTR_ERR(recovery);
+
+       ecode = ntdb_recovery_area(ntdb, methods, &recovery_off, recovery);
+       if (ecode) {
+               free(recovery);
+               return ecode;
+       }
+
+       if (recovery->max_len < recovery_size) {
+               /* Not large enough. Free up old recovery area. */
+               if (recovery_off) {
+                       ntdb->stats.frees++;
+                       ecode = add_free_record(ntdb, recovery_off,
+                                               sizeof(*recovery)
+                                               + recovery->max_len,
+                                               NTDB_LOCK_WAIT, true);
+                       free(recovery);
+                       if (ecode != NTDB_SUCCESS) {
+                               return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                                 "ntdb_recovery_allocate:"
+                                                 " failed to free previous"
+                                                 " recovery area");
+                       }
+
+                       /* Refresh recovery after add_free_record above. */
+                       recovery = alloc_recovery(ntdb, &recovery_size);
+                       if (NTDB_PTR_IS_ERR(recovery))
+                               return NTDB_PTR_ERR(recovery);
+               }
+
+               recovery_off = create_recovery_area(ntdb, recovery_size,
+                                                   recovery);
+               if (NTDB_OFF_IS_ERR(recovery_off)) {
+                       free(recovery);
+                       return NTDB_OFF_TO_ERR(recovery_off);
+               }
+       }
+
+       /* Now we know size, convert rec header. */
+       recovery->magic = NTDB_RECOVERY_INVALID_MAGIC;
+       recovery->len = recovery_size;
+       recovery->eof = old_map_size;
+       ntdb_convert(ntdb, recovery, sizeof(*recovery));
+
+       /* write the recovery data to the recovery area */
+       ecode = methods->twrite(ntdb, recovery_off, recovery, recovery_size);
+       if (ecode != NTDB_SUCCESS) {
+               free(recovery);
+               return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_setup_recovery:"
+                                 " failed to write recovery data");
+       }
+       transaction_write_existing(ntdb, recovery_off, recovery, recovery_size);
+
+       free(recovery);
+
+       /* as we don't have ordered writes, we have to sync the recovery
+          data before we update the magic to indicate that the recovery
+          data is present */
+       ecode = transaction_sync(ntdb, recovery_off, recovery_size);
+       if (ecode != NTDB_SUCCESS)
+               return ecode;
+
+       magic = NTDB_RECOVERY_MAGIC;
+       ntdb_convert(ntdb, &magic, sizeof(magic));
+
+       ntdb->transaction->magic_offset
+               = recovery_off + offsetof(struct ntdb_recovery_record, magic);
+
+       ecode = methods->twrite(ntdb, ntdb->transaction->magic_offset,
+                               &magic, sizeof(magic));
+       if (ecode != NTDB_SUCCESS) {
+               return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_setup_recovery:"
+                                 " failed to write recovery magic");
+       }
+       transaction_write_existing(ntdb, ntdb->transaction->magic_offset,
+                                  &magic, sizeof(magic));
+
+       /* ensure the recovery magic marker is on disk */
+       return transaction_sync(ntdb, ntdb->transaction->magic_offset,
+                               sizeof(magic));
+}
+
+static enum NTDB_ERROR _ntdb_transaction_prepare_commit(struct ntdb_context *ntdb)
+{
+       const struct ntdb_methods *methods;
+       enum NTDB_ERROR ecode;
+
+       if (ntdb->transaction == NULL) {
+               return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+                                 "ntdb_transaction_prepare_commit:"
+                                 " no transaction");
+       }
+
+       if (ntdb->transaction->prepared) {
+               _ntdb_transaction_cancel(ntdb);
+               return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+                                 "ntdb_transaction_prepare_commit:"
+                                 " transaction already prepared");
+       }
+
+       if (ntdb->transaction->transaction_error) {
+               _ntdb_transaction_cancel(ntdb);
+               return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_prepare_commit:"
+                                 " transaction error pending");
+       }
+
+
+       if (ntdb->transaction->nesting != 0) {
+               return NTDB_SUCCESS;
+       }
+
+       /* check for a null transaction */
+       if (ntdb->transaction->blocks == NULL) {
+               return NTDB_SUCCESS;
+       }
+
+       methods = ntdb->transaction->io_methods;
+
+       /* upgrade the main transaction lock region to a write lock */
+       ecode = ntdb_allrecord_upgrade(ntdb, NTDB_HASH_LOCK_START);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       /* get the open lock - this prevents new users attaching to the database
+          during the commit */
+       ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
+       if (ecode != NTDB_SUCCESS) {
+               return ecode;
+       }
+
+       /* Since we have whole db locked, we don't need the expansion lock. */
+       if (!(ntdb->flags & NTDB_NOSYNC)) {
+               /* Sets up ntdb->transaction->recovery and
+                * ntdb->transaction->magic_offset. */
+               ecode = transaction_setup_recovery(ntdb);
+               if (ecode != NTDB_SUCCESS) {
+                       return ecode;
+               }
+       }
+
+       ntdb->transaction->prepared = true;
+
+       /* expand the file to the new size if needed */
+       if (ntdb->file->map_size != ntdb->transaction->old_map_size) {
+               ntdb_len_t add;
+
+               add = ntdb->file->map_size - ntdb->transaction->old_map_size;
+               /* Restore original map size for ntdb_expand_file */
+               ntdb->file->map_size = ntdb->transaction->old_map_size;
+               ecode = methods->expand_file(ntdb, add);
+               if (ecode != NTDB_SUCCESS) {
+                       return ecode;
+               }
+       }
+
+       /* Keep the open lock until the actual commit */
+       return NTDB_SUCCESS;
+}
+
+/*
+   prepare to commit the current transaction
+*/
+_PUBLIC_ enum NTDB_ERROR ntdb_transaction_prepare_commit(struct ntdb_context *ntdb)
+{
+       return ntdb->last_error = _ntdb_transaction_prepare_commit(ntdb);
+}
+
+/*
+  commit the current transaction
+*/
+_PUBLIC_ enum NTDB_ERROR ntdb_transaction_commit(struct ntdb_context *ntdb)
+{
+       const struct ntdb_methods *methods;
+       int i;
+       enum NTDB_ERROR ecode;
+
+       if (ntdb->transaction == NULL) {
+               return ntdb->last_error = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+                                                   NTDB_LOG_USE_ERROR,
+                                                   "ntdb_transaction_commit:"
+                                                   " no transaction");
+       }
+
+       ntdb_trace(ntdb, "ntdb_transaction_commit");
+
+       if (ntdb->transaction->nesting != 0) {
+               ntdb->transaction->nesting--;
+               return ntdb->last_error = NTDB_SUCCESS;
+       }
+
+       /* check for a null transaction */
+       if (ntdb->transaction->blocks == NULL) {
+               _ntdb_transaction_cancel(ntdb);
+               return ntdb->last_error = NTDB_SUCCESS;
+       }
+
+       if (!ntdb->transaction->prepared) {
+               ecode = _ntdb_transaction_prepare_commit(ntdb);
+               if (ecode != NTDB_SUCCESS) {
+                       _ntdb_transaction_cancel(ntdb);
+                       return ntdb->last_error = ecode;
+               }
+       }
+
+       methods = ntdb->transaction->io_methods;
+
+       /* perform all the writes */
+       for (i=0;i<ntdb->transaction->num_blocks;i++) {
+               ntdb_off_t offset;
+               ntdb_len_t length;
+
+               if (ntdb->transaction->blocks[i] == NULL) {
+                       continue;
+               }
+
+               offset = i * PAGESIZE;
+               length = PAGESIZE;
+               if (i == ntdb->transaction->num_blocks-1) {
+                       length = ntdb->transaction->last_block_size;
+               }
+
+               ecode = methods->twrite(ntdb, offset,
+                                       ntdb->transaction->blocks[i], length);
+               if (ecode != NTDB_SUCCESS) {
+                       /* we've overwritten part of the data and
+                          possibly expanded the file, so we need to
+                          run the crash recovery code */
+                       ntdb->io = methods;
+                       ntdb_transaction_recover(ntdb);
+
+                       _ntdb_transaction_cancel(ntdb);
+
+                       return ntdb->last_error = ecode;
+               }
+               SAFE_FREE(ntdb->transaction->blocks[i]);
+       }
+
+       SAFE_FREE(ntdb->transaction->blocks);
+       ntdb->transaction->num_blocks = 0;
+
+       /* ensure the new data is on disk */
+       ecode = transaction_sync(ntdb, 0, ntdb->file->map_size);
+       if (ecode != NTDB_SUCCESS) {
+               return ntdb->last_error = ecode;
+       }
+
+       /*
+         TODO: maybe write to some dummy hdr field, or write to magic
+         offset without mmap, before the last sync, instead of the
+         utime() call
+       */
+
+       /* on some systems (like Linux 2.6.x) changes via mmap/msync
+          don't change the mtime of the file, this means the file may
+          not be backed up (as ntdb rounding to block sizes means that
+          file size changes are quite rare too). The following forces
+          mtime changes when a transaction completes */
+#if HAVE_UTIME
+       utime(ntdb->name, NULL);
+#endif
+
+       /* use a transaction cancel to free memory and remove the
+          transaction locks: it "restores" map_size, too. */
+       ntdb->transaction->old_map_size = ntdb->file->map_size;
+       _ntdb_transaction_cancel(ntdb);
+
+       return ntdb->last_error = NTDB_SUCCESS;
+}
+
+
+/*
+  recover from an aborted transaction. Must be called with exclusive
+  database write access already established (including the open
+  lock to prevent new processes attaching)
+*/
+enum NTDB_ERROR ntdb_transaction_recover(struct ntdb_context *ntdb)
+{
+       ntdb_off_t recovery_head, recovery_eof;
+       unsigned char *data, *p;
+       struct ntdb_recovery_record rec;
+       enum NTDB_ERROR ecode;
+
+       /* find the recovery area */
+       recovery_head = ntdb_read_off(ntdb, offsetof(struct ntdb_header,recovery));
+       if (NTDB_OFF_IS_ERR(recovery_head)) {
+               ecode = NTDB_OFF_TO_ERR(recovery_head);
+               return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_recover:"
+                                 " failed to read recovery head");
+       }
+
+       if (recovery_head == 0) {
+               /* we have never allocated a recovery record */
+               return NTDB_SUCCESS;
+       }
+
+       /* read the recovery record */
+       ecode = ntdb_read_convert(ntdb, recovery_head, &rec, sizeof(rec));
+       if (ecode != NTDB_SUCCESS) {
+               return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_recover:"
+                                 " failed to read recovery record");
+       }
+
+       if (rec.magic != NTDB_RECOVERY_MAGIC) {
+               /* there is no valid recovery data */
+               return NTDB_SUCCESS;
+       }
+
+       if (ntdb->flags & NTDB_RDONLY) {
+               return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_recover:"
+                                 " attempt to recover read only database");
+       }
+
+       recovery_eof = rec.eof;
+
+       data = (unsigned char *)malloc(rec.len);
+       if (data == NULL) {
+               return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_recover:"
+                                 " failed to allocate recovery data");
+       }
+
+       /* read the full recovery data */
+       ecode = ntdb->io->tread(ntdb, recovery_head + sizeof(rec), data,
+                                   rec.len);
+       if (ecode != NTDB_SUCCESS) {
+               return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_recover:"
+                                 " failed to read recovery data");
+       }
+
+       /* recover the file data */
+       p = data;
+       while (p+sizeof(ntdb_off_t)+sizeof(ntdb_len_t) < data + rec.len) {
+               ntdb_off_t ofs;
+               ntdb_len_t len;
+               ntdb_convert(ntdb, p, sizeof(ofs) + sizeof(len));
+               memcpy(&ofs, p, sizeof(ofs));
+               memcpy(&len, p + sizeof(ofs), sizeof(len));
+               p += sizeof(ofs) + sizeof(len);
+
+               ecode = ntdb->io->twrite(ntdb, ofs, p, len);
+               if (ecode != NTDB_SUCCESS) {
+                       free(data);
+                       return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                         "ntdb_transaction_recover:"
+                                         " failed to recover %zu bytes"
+                                         " at offset %zu",
+                                         (size_t)len, (size_t)ofs);
+               }
+               p += len;
+       }
+
+       free(data);
+
+       ecode = transaction_sync(ntdb, 0, ntdb->file->map_size);
+       if (ecode != NTDB_SUCCESS) {
+               return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_recover:"
+                                 " failed to sync recovery");
+       }
+
+       /* if the recovery area is after the recovered eof then remove it */
+       if (recovery_eof <= recovery_head) {
+               ecode = ntdb_write_off(ntdb, offsetof(struct ntdb_header,
+                                                   recovery),
+                                     0);
+               if (ecode != NTDB_SUCCESS) {
+                       return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                         "ntdb_transaction_recover:"
+                                         " failed to remove recovery head");
+               }
+       }
+
+       /* remove the recovery magic */
+       ecode = ntdb_write_off(ntdb,
+                             recovery_head
+                             + offsetof(struct ntdb_recovery_record, magic),
+                             NTDB_RECOVERY_INVALID_MAGIC);
+       if (ecode != NTDB_SUCCESS) {
+               return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_recover:"
+                                 " failed to remove recovery magic");
+       }
+
+       ecode = transaction_sync(ntdb, 0, recovery_eof);
+       if (ecode != NTDB_SUCCESS) {
+               return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+                                 "ntdb_transaction_recover:"
+                                 " failed to sync2 recovery");
+       }
+
+       ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
+                  "ntdb_transaction_recover: recovered %zu byte database",
+                  (size_t)recovery_eof);
+
+       /* all done */
+       return NTDB_SUCCESS;
+}
+
+ntdb_bool_err ntdb_needs_recovery(struct ntdb_context *ntdb)
+{
+       ntdb_off_t recovery_head;
+       struct ntdb_recovery_record rec;
+       enum NTDB_ERROR ecode;
+
+       /* find the recovery area */
+       recovery_head = ntdb_read_off(ntdb, offsetof(struct ntdb_header,recovery));
+       if (NTDB_OFF_IS_ERR(recovery_head)) {
+               return recovery_head;
+       }
+
+       if (recovery_head == 0) {
+               /* we have never allocated a recovery record */
+               return false;
+       }
+
+       /* read the recovery record */
+       ecode = ntdb_read_convert(ntdb, recovery_head, &rec, sizeof(rec));
+       if (ecode != NTDB_SUCCESS) {
+               return NTDB_ERR_TO_OFF(ecode);
+       }
+
+       return (rec.magic == NTDB_RECOVERY_MAGIC);
+}
diff --git a/lib/ntdb/traverse.c b/lib/ntdb/traverse.c
new file mode 100644 (file)
index 0000000..52bf75c
--- /dev/null
@@ -0,0 +1,99 @@
+ /*
+   Trivial Database 2: traverse function.
+   Copyright (C) Rusty Russell 2010
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/likely/likely.h>
+
+_PUBLIC_ int64_t ntdb_traverse_(struct ntdb_context *ntdb,
+                     int (*fn)(struct ntdb_context *,
+                               NTDB_DATA, NTDB_DATA, void *),
+                     void *p)
+{
+       enum NTDB_ERROR ecode;
+       struct traverse_info tinfo;
+       NTDB_DATA k, d;
+       int64_t count = 0;
+
+       k.dptr = NULL;
+       for (ecode = first_in_hash(ntdb, &tinfo, &k, &d.dsize);
+            ecode == NTDB_SUCCESS;
+            ecode = next_in_hash(ntdb, &tinfo, &k, &d.dsize)) {
+               d.dptr = k.dptr + k.dsize;
+
+               count++;
+               if (fn && fn(ntdb, k, d, p)) {
+                       free(k.dptr);
+                       ntdb->last_error = NTDB_SUCCESS;
+                       return count;
+               }
+               free(k.dptr);
+       }
+
+       if (ecode != NTDB_ERR_NOEXIST) {
+               return NTDB_ERR_TO_OFF(ntdb->last_error = ecode);
+       }
+       ntdb->last_error = NTDB_SUCCESS;
+       return count;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key)
+{
+       struct traverse_info tinfo;
+
+       return ntdb->last_error = first_in_hash(ntdb, &tinfo, key, NULL);
+}
+
+/* We lock twice, not very efficient.  We could keep last key & tinfo cached. */
+_PUBLIC_ enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key)
+{
+       struct traverse_info tinfo;
+       struct hash_info h;
+       struct ntdb_used_record rec;
+
+       tinfo.prev = find_and_lock(ntdb, *key, F_RDLCK, &h, &rec, &tinfo);
+       free(key->dptr);
+       if (NTDB_OFF_IS_ERR(tinfo.prev)) {
+               return ntdb->last_error = NTDB_OFF_TO_ERR(tinfo.prev);
+       }
+       ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+
+       return ntdb->last_error = next_in_hash(ntdb, &tinfo, key, NULL);
+}
+
+static int wipe_one(struct ntdb_context *ntdb,
+                   NTDB_DATA key, NTDB_DATA data, enum NTDB_ERROR *ecode)
+{
+       *ecode = ntdb_delete(ntdb, key);
+       return (*ecode != NTDB_SUCCESS);
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_wipe_all(struct ntdb_context *ntdb)
+{
+       enum NTDB_ERROR ecode;
+       int64_t count;
+
+       ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false);
+       if (ecode != NTDB_SUCCESS)
+               return ntdb->last_error = ecode;
+
+       /* FIXME: Be smarter. */
+       count = ntdb_traverse(ntdb, wipe_one, &ecode);
+       if (count < 0)
+               ecode = NTDB_OFF_TO_ERR(count);
+       ntdb_allrecord_unlock(ntdb, F_WRLCK);
+       return ntdb->last_error = ecode;
+}
diff --git a/lib/ntdb/wscript b/lib/ntdb/wscript
new file mode 100644 (file)
index 0000000..e6feb14
--- /dev/null
@@ -0,0 +1,265 @@
+#!/usr/bin/env python
+
+APPNAME = 'ntdb'
+VERSION = '0.9'
+
+blddir = 'bin'
+
+import sys, os
+
+# find the buildtools directory
+srcdir = '.'
+while not os.path.exists(srcdir+'/buildtools') and len(srcdir.split('/')) < 5:
+    srcdir = '../' + srcdir
+sys.path.insert(0, srcdir + '/buildtools/wafsamba')
+
+import wafsamba, samba_dist, Options, Logs, glob
+
+samba_dist.DIST_DIRS('lib/ntdb:. lib/replace:lib/replace lib/ccan:lib/ccan buildtools:buildtools')
+
+def set_options(opt):
+    opt.BUILTIN_DEFAULT('replace,ccan')
+    opt.PRIVATE_EXTENSION_DEFAULT('ntdb', noextension='ntdb')
+    opt.RECURSE('lib/replace')
+    opt.add_option('--valgrind',
+                   help=("use valgrind on tests programs"),
+                   action="store_true", dest='VALGRIND', default=False)
+    opt.add_option('--valgrind-log',
+                   help=("where to put the valgrind log"),
+                   action="store", dest='VALGRINDLOG', default=None)
+    if opt.IN_LAUNCH_DIR():
+        opt.add_option('--disable-python',
+                       help=("disable the pyntdb module"),
+                       action="store_true", dest='disable_python', default=False)
+
+def configure(conf):
+    conf.RECURSE('lib/replace')
+    conf.RECURSE('lib/ccan')
+
+    conf.env.NTDB_TEST_RUN_SRC=['test/run-001-encode.c',
+                                'test/run-001-fls.c',
+                                'test/run-01-new_database.c',
+                                'test/run-02-expand.c',
+                                'test/run-03-coalesce.c',
+                                'test/run-04-basichash.c',
+                                'test/run-05-readonly-open.c',
+                                'test/run-10-simple-store.c',
+                                'test/run-11-simple-fetch.c',
+                                'test/run-12-check.c',
+                                'test/run-15-append.c',
+                                'test/run-20-growhash.c',
+                                'test/run-25-hashoverload.c',
+                                'test/run-30-exhaust-before-expand.c',
+                                'test/run-35-convert.c',
+                                'test/run-50-multiple-freelists.c',
+                                'test/run-56-open-during-transaction.c',
+                                'test/run-57-die-during-transaction.c',
+                                'test/run-64-bit-tdb.c',
+                                'test/run-90-get-set-attributes.c',
+                                'test/run-capabilities.c',
+                                'test/run-expand-in-transaction.c',
+                                'test/run-features.c',
+                                'test/run-lockall.c',
+                                'test/run-remap-in-read_traverse.c',
+                                'test/run-seed.c',
+                                'test/run-tdb_errorstr.c',
+                                'test/run-tdb_foreach.c',
+                                'test/run-traverse.c']
+    conf.env.NTDB_TEST_API_SRC=['test/api-12-store.c',
+                                'test/api-13-delete.c',
+                                'test/api-14-exists.c',
+                                'test/api-16-wipe_all.c',
+                                'test/api-21-parse_record.c',
+                                'test/api-55-transaction.c',
+                                'test/api-80-tdb_fd.c',
+                                'test/api-81-seqnum.c',
+                                'test/api-82-lockattr.c',
+                                'test/api-83-openhook.c',
+                                'test/api-91-get-stats.c',
+                                'test/api-92-get-set-readonly.c',
+                                'test/api-93-repack.c',
+                                'test/api-add-remove-flags.c',
+                                'test/api-check-callback.c',
+                                'test/api-firstkey-nextkey.c',
+                                'test/api-fork-test.c',
+                                'test/api-locktimeout.c',
+                                'test/api-missing-entries.c',
+                                'test/api-open-multiple-times.c',
+                                'test/api-record-expand.c',
+                                'test/api-simple-delete.c',
+                                'test/api-summary.c']
+    conf.env.NTDB_TEST_API_HELPER_SRC=['test/helpapi-external-agent.c']
+    conf.env.NTDB_TEST_RUN_HELPER_SRC=['test/helprun-external-agent.c',
+                                       'test/helprun-layout.c']
+    conf.env.NTDB_TEST_HELPER_SRC=['test/external-agent.c',
+                                   'test/failtest_helper.c',
+                                   'test/lock-tracking.c',
+                                   'test/logging.c',
+                                   'test/tap-interface.c']
+
+    conf.env.standalone_ntdb = conf.IN_LAUNCH_DIR()
+    conf.env.disable_python = getattr(Options.options, 'disable_python', False)
+
+    if not conf.env.standalone_ntdb:
+        if conf.CHECK_BUNDLED_SYSTEM('ntdb', minversion=VERSION,
+                                         implied_deps='replace'):
+            conf.define('USING_SYSTEM_NTDB', 1)
+            if conf.CHECK_BUNDLED_SYSTEM_PYTHON('pyntdb', 'ntdb', minversion=VERSION):
+                conf.define('USING_SYSTEM_PYNTDB', 1)
+
+    if not conf.env.disable_python:
+        # also disable if we don't have the python libs installed
+        conf.find_program('python', var='PYTHON')
+        conf.check_tool('python')
+        conf.check_python_version((2,4,2))
+        conf.SAMBA_CHECK_PYTHON_HEADERS(mandatory=False)
+        if not conf.env.HAVE_PYTHON_H:
+            Logs.warn('Disabling pyntdb as python devel libs not found')
+            conf.env.disable_python = True
+
+    # This make #include <ccan/...> work.
+    conf.ADD_EXTRA_INCLUDES('''#lib''')
+
+    conf.SAMBA_CONFIG_H()
+
+def build(bld):
+    bld.RECURSE('lib/replace')
+    bld.RECURSE('lib/ccan')
+
+    if bld.env.standalone_ntdb:
+        bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig'
+        private_library = False
+    else:
+        private_library = True
+
+    SRC = '''check.c free.c hash.c io.c lock.c open.c
+                 summary.c ntdb.c transaction.c traverse.c'''
+
+    if not bld.CONFIG_SET('USING_SYSTEM_NTDB'):
+        bld.SAMBA_LIBRARY('ntdb',
+                          SRC,
+                          deps='replace ccan',
+                          includes='.',
+                          abi_directory='ABI',
+                          abi_match='ntdb_*',
+                          hide_symbols=True,
+                          vnum=VERSION,
+                          public_headers='ntdb.h',
+                          public_headers_install=not private_library,
+                          pc_files='ntdb.pc',
+                          private_library=private_library)
+
+        bld.SAMBA_BINARY('ntdbtorture',
+                         'tools/ntdbtorture.c',
+                         deps='ntdb',
+                         install=False)
+
+        bld.SAMBA_BINARY('ntdbtool',
+                         'tools/ntdbtool.c',
+                         deps='ntdb')
+
+        bld.SAMBA_BINARY('ntdbdump',
+                         'tools/ntdbdump.c',
+                         deps='ntdb')
+
+        bld.SAMBA_BINARY('ntdbrestore',
+                         'tools/ntdbrestore.c',
+                         deps='ntdb')
+
+        bld.SAMBA_BINARY('ntdbbackup',
+                         'tools/ntdbbackup.c',
+                         deps='ntdb')
+
+        if bld.env.DEVELOPER_MODE:
+            # FIXME: We need CCAN for some API tests, but waf thinks it's
+            # already available via ntdb.  It is, but not publicly.
+            # Workaround is to build a private, non-hiding version.
+            bld.SAMBA_SUBSYSTEM('ntdb-testing',
+                                SRC,
+                                deps='replace ccan',
+                                includes='.')
+
+            bld.SAMBA_SUBSYSTEM('ntdb-test-helpers',
+                                bld.env.NTDB_TEST_HELPER_SRC,
+                                deps='replace')
+            bld.SAMBA_SUBSYSTEM('ntdb-run-helpers',
+                                bld.env.NTDB_TEST_RUN_HELPER_SRC,
+                                deps='replace')
+            bld.SAMBA_SUBSYSTEM('ntdb-api-helpers',
+                                bld.env.NTDB_TEST_API_HELPER_SRC,
+                                deps='replace ntdb-testing')
+
+            for f in bld.env.NTDB_TEST_RUN_SRC:
+                base = os.path.splitext(os.path.basename(f))[0]
+                bld.SAMBA_BINARY('ntdb-' + base, f,
+                                 deps='ccan replace ntdb-test-helpers ntdb-run-helpers ccan-failtest',
+                                 install=False)
+
+            for f in bld.env.NTDB_TEST_API_SRC:
+                base = os.path.splitext(os.path.basename(f))[0]
+                bld.SAMBA_BINARY('ntdb-' + base, f,
+                                 deps='ccan replace ntdb-test-helpers ntdb-api-helpers',
+                                 install=False)
+
+        if not bld.CONFIG_SET('USING_SYSTEM_PYNTDB'):
+            bld.SAMBA_PYTHON('pyntdb',
+                             source='pyntdb.c',
+                             deps='ntdb',
+                             enabled=not bld.env.disable_python,
+                             realname='ntdb.so',
+                             cflags='-DPACKAGE_VERSION=\"%s\"' % VERSION)
+
+def testonly(ctx):
+    '''run ntdb testsuite'''
+    import Utils, samba_utils, shutil
+    ecode = 0;
+
+    env = samba_utils.LOAD_ENVIRONMENT()
+
+    if env.standalone_ntdb:
+        # FIXME: This is horrible :(
+        test_prefix = "%s/st" % (Utils.g_module.blddir)
+        shutil.rmtree(test_prefix, ignore_errors=True)
+        os.makedirs(test_prefix)
+
+        # Create scratch directory for tests.
+        testdir = os.path.join(test_prefix, 'ntdb-tests')
+        samba_utils.mkdir_p(testdir)
+        # Symlink back to source dir so it can find tests in test/
+        link = os.path.join(testdir, 'test')
+        if not os.path.exists(link):
+            os.symlink(os.path.abspath(os.path.join(env.cwd, 'test')), link)
+
+        if Options.options.VALGRIND:
+            os.environ['VALGRIND'] = 'valgrind -q --num-callers=30'
+        if Options.options.VALGRINDLOG is not None:
+            os.environ['VALGRIND'] += ' --log-file=%s' % Options.options.VALGRINDLOG
+
+        for f in env.NTDB_TEST_RUN_SRC + env.NTDB_TEST_API_SRC:
+            name = "ntdb-" + os.path.splitext(os.path.basename(f))[0]
+            cmd = "cd " + testdir + " && $VALGRIND " + os.path.abspath(os.path.join(Utils.g_module.blddir, name)) + " > test-output 2>&1"
+            print("..." + f)
+            ret = samba_utils.RUN_COMMAND(cmd)
+            if ret != 0:
+                print("%s (%s) failed:" % (name, f))
+                samba_utils.RUN_COMMAND("cat " + os.path.join(testdir, 'test-output'))
+                ecode = ret;
+                break;
+
+    sys.exit(ecode)
+
+# WAF doesn't build the unit tests for this, maybe because they don't link with ntdb?
+# This forces it
+def test(ctx):
+    import Scripting
+    Scripting.commands.append('build')
+    Scripting.commands.append('testonly')
+
+def dist():
+    '''makes a tarball for distribution'''
+    samba_dist.dist()
+
+def reconfigure(ctx):
+    '''reconfigure if config scripts have changed'''
+    import samba_utils
+    samba_utils.reconfigure(ctx)
diff --git a/lib/tdb2/ABI/tdb-2.0.0.sigs b/lib/tdb2/ABI/tdb-2.0.0.sigs
deleted file mode 100644 (file)
index 0e54b90..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-tdb1_incompatible_hash: uint64_t (const void *, size_t, uint64_t, void *)
-tdb_add_flag: void (struct tdb_context *, unsigned int)
-tdb_append: enum TDB_ERROR (struct tdb_context *, struct tdb_data, struct tdb_data)
-tdb_chainlock: enum TDB_ERROR (struct tdb_context *, TDB_DATA)
-tdb_chainlock_read: enum TDB_ERROR (struct tdb_context *, TDB_DATA)
-tdb_chainunlock: void (struct tdb_context *, TDB_DATA)
-tdb_chainunlock_read: void (struct tdb_context *, TDB_DATA)
-tdb_check_: enum TDB_ERROR (struct tdb_context *, enum TDB_ERROR (*)(TDB_DATA, TDB_DATA, void *), void *)
-tdb_close: int (struct tdb_context *)
-tdb_delete: enum TDB_ERROR (struct tdb_context *, struct tdb_data)
-tdb_error: enum TDB_ERROR (struct tdb_context *)
-tdb_errorstr: const char *(enum TDB_ERROR)
-tdb_exists: bool (struct tdb_context *, TDB_DATA)
-tdb_fd: int (const struct tdb_context *)
-tdb_fetch: enum TDB_ERROR (struct tdb_context *, struct tdb_data, struct tdb_data *)
-tdb_firstkey: enum TDB_ERROR (struct tdb_context *, struct tdb_data *)
-tdb_foreach_: void (int (*)(struct tdb_context *, void *), void *)
-tdb_get_attribute: enum TDB_ERROR (struct tdb_context *, union tdb_attribute *)
-tdb_get_flags: unsigned int (struct tdb_context *)
-tdb_get_seqnum: int64_t (struct tdb_context *)
-tdb_lockall: enum TDB_ERROR (struct tdb_context *)
-tdb_lockall_read: enum TDB_ERROR (struct tdb_context *)
-tdb_name: const char *(const struct tdb_context *)
-tdb_nextkey: enum TDB_ERROR (struct tdb_context *, struct tdb_data *)
-tdb_open: struct tdb_context *(const char *, int, int, mode_t, union tdb_attribute *)
-tdb_parse_record_: enum TDB_ERROR (struct tdb_context *, TDB_DATA, enum TDB_ERROR (*)(TDB_DATA, TDB_DATA, void *), void *)
-tdb_remove_flag: void (struct tdb_context *, unsigned int)
-tdb_repack: enum TDB_ERROR (struct tdb_context *)
-tdb_set_attribute: enum TDB_ERROR (struct tdb_context *, const union tdb_attribute *)
-tdb_store: enum TDB_ERROR (struct tdb_context *, struct tdb_data, struct tdb_data, int)
-tdb_summary: enum TDB_ERROR (struct tdb_context *, enum tdb_summary_flags, char **)
-tdb_transaction_cancel: void (struct tdb_context *)
-tdb_transaction_commit: enum TDB_ERROR (struct tdb_context *)
-tdb_transaction_prepare_commit: enum TDB_ERROR (struct tdb_context *)
-tdb_transaction_start: enum TDB_ERROR (struct tdb_context *)
-tdb_traverse_: int64_t (struct tdb_context *, int (*)(struct tdb_context *, TDB_DATA, TDB_DATA, void *), void *)
-tdb_unlockall: void (struct tdb_context *)
-tdb_unlockall_read: void (struct tdb_context *)
-tdb_unset_attribute: void (struct tdb_context *, enum tdb_attribute_type)
-tdb_wipe_all: enum TDB_ERROR (struct tdb_context *)
diff --git a/lib/tdb2/ABI/tdb-2.0.1.sigs b/lib/tdb2/ABI/tdb-2.0.1.sigs
deleted file mode 100644 (file)
index f9ee55f..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-tdb_add_flag: void (struct tdb_context *, unsigned int)
-tdb_append: enum TDB_ERROR (struct tdb_context *, struct tdb_data, struct tdb_data)
-tdb_chainlock: enum TDB_ERROR (struct tdb_context *, TDB_DATA)
-tdb_chainlock_read: enum TDB_ERROR (struct tdb_context *, TDB_DATA)
-tdb_chainunlock: void (struct tdb_context *, TDB_DATA)
-tdb_chainunlock_read: void (struct tdb_context *, TDB_DATA)
-tdb_check_: enum TDB_ERROR (struct tdb_context *, enum TDB_ERROR (*)(TDB_DATA, TDB_DATA, void *), void *)
-tdb_close: int (struct tdb_context *)
-tdb_delete: enum TDB_ERROR (struct tdb_context *, struct tdb_data)
-tdb_error: enum TDB_ERROR (struct tdb_context *)
-tdb_errorstr: const char *(enum TDB_ERROR)
-tdb_exists: bool (struct tdb_context *, TDB_DATA)
-tdb_fd: int (const struct tdb_context *)
-tdb_fetch: enum TDB_ERROR (struct tdb_context *, struct tdb_data, struct tdb_data *)
-tdb_firstkey: enum TDB_ERROR (struct tdb_context *, struct tdb_data *)
-tdb_foreach_: void (int (*)(struct tdb_context *, void *), void *)
-tdb_get_attribute: enum TDB_ERROR (struct tdb_context *, union tdb_attribute *)
-tdb_get_flags: unsigned int (struct tdb_context *)
-tdb_get_seqnum: int64_t (struct tdb_context *)
-tdb_lockall: enum TDB_ERROR (struct tdb_context *)
-tdb_lockall_read: enum TDB_ERROR (struct tdb_context *)
-tdb_name: const char *(const struct tdb_context *)
-tdb_nextkey: enum TDB_ERROR (struct tdb_context *, struct tdb_data *)
-tdb_open: struct tdb_context *(const char *, int, int, mode_t, union tdb_attribute *)
-tdb_parse_record_: enum TDB_ERROR (struct tdb_context *, TDB_DATA, enum TDB_ERROR (*)(TDB_DATA, TDB_DATA, void *), void *)
-tdb_remove_flag: void (struct tdb_context *, unsigned int)
-tdb_repack: enum TDB_ERROR (struct tdb_context *)
-tdb_set_attribute: enum TDB_ERROR (struct tdb_context *, const union tdb_attribute *)
-tdb_store: enum TDB_ERROR (struct tdb_context *, struct tdb_data, struct tdb_data, int)
-tdb_summary: enum TDB_ERROR (struct tdb_context *, enum tdb_summary_flags, char **)
-tdb_transaction_cancel: void (struct tdb_context *)
-tdb_transaction_commit: enum TDB_ERROR (struct tdb_context *)
-tdb_transaction_prepare_commit: enum TDB_ERROR (struct tdb_context *)
-tdb_transaction_start: enum TDB_ERROR (struct tdb_context *)
-tdb_traverse_: int64_t (struct tdb_context *, int (*)(struct tdb_context *, TDB_DATA, TDB_DATA, void *), void *)
-tdb_unlockall: void (struct tdb_context *)
-tdb_unlockall_read: void (struct tdb_context *)
-tdb_unset_attribute: void (struct tdb_context *, enum tdb_attribute_type)
-tdb_wipe_all: enum TDB_ERROR (struct tdb_context *)
diff --git a/lib/tdb2/TODO b/lib/tdb2/TODO
deleted file mode 100644 (file)
index 0a9374f..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-- tdb2restore, tdb2dump, tdb2backup
-- tdb2tool man page
-- Integrate ccan testsuite
-- Integrate tdb2 testsuite
diff --git a/lib/tdb2/_info b/lib/tdb2/_info
deleted file mode 100644 (file)
index 37c0c29..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <string.h>
-#include <stdio.h>
-
-/**
- * tdb2 - [[WORK IN PROGRESS!]] The trivial (64bit transactional) database
- *
- * The tdb2 module provides an efficient keyword data mapping (usually
- * within a file).  It supports transactions, so the contents of the
- * database is reliable even across crashes.
- *
- * Example:
- *     #include <ccan/tdb2/tdb2.h>
- *     #include <ccan/str/str.h>
- *     #include <ccan/err/err.h>
- *     #include <stdio.h>
- *
- *     static void usage(const char *argv0)
- *     {
- *             errx(1, "Usage: %s fetch <dbfile> <key>\n"
- *                  "OR %s store <dbfile> <key> <data>", argv0, argv0);
- *     }
- *
- *     int main(int argc, char *argv[])
- *     {
- *             struct tdb_context *tdb;
- *             TDB_DATA key, value;
- *             enum TDB_ERROR error;
- *
- *             if (argc < 4)
- *                     usage(argv[0]);
- *
- *             tdb = tdb_open(argv[2], TDB_DEFAULT, O_CREAT|O_RDWR,0600, NULL);
- *             if (!tdb)
- *                     err(1, "Opening %s", argv[2]);
- *
- *             key.dptr = (void *)argv[3];
- *             key.dsize = strlen(argv[3]);
- *
- *             if (streq(argv[1], "fetch")) {
- *                     if (argc != 4)
- *                             usage(argv[0]);
- *                     error = tdb_fetch(tdb, key, &value);
- *                     if (error)
- *                             errx(1, "fetch %s: %s",
- *                                  argv[3], tdb_errorstr(error));
- *                     printf("%.*s\n", value.dsize, (char *)value.dptr);
- *                     free(value.dptr);
- *             } else if (streq(argv[1], "store")) {
- *                     if (argc != 5)
- *                             usage(argv[0]);
- *                     value.dptr = (void *)argv[4];
- *                     value.dsize = strlen(argv[4]);
- *                     error = tdb_store(tdb, key, value, 0);
- *                     if (error)
- *                             errx(1, "store %s: %s",
- *                                  argv[3], tdb_errorstr(error));
- *             } else
- *                     usage(argv[0]);
- *
- *             return 0;
- *     }
- *
- * Maintainer: Rusty Russell <rusty@rustcorp.com.au>
- *
- * Author: Rusty Russell
- *
- * License: LGPLv3 (or later)
- */
-int main(int argc, char *argv[])
-{
-       if (argc != 2)
-               return 1;
-
-       if (strcmp(argv[1], "depends") == 0) {
-               printf("ccan/asprintf\n");
-               printf("ccan/hash\n");
-               printf("ccan/likely\n");
-               printf("ccan/asearch\n");
-               printf("ccan/compiler\n");
-               printf("ccan/build_assert\n");
-               printf("ccan/ilog\n");
-               printf("ccan/failtest\n");
-               printf("ccan/tally\n");
-               printf("ccan/typesafe_cb\n");
-               printf("ccan/cast\n");
-               printf("ccan/endian\n");
-               return 0;
-       }
-
-       return 1;
-}
diff --git a/lib/tdb2/check.c b/lib/tdb2/check.c
deleted file mode 100644 (file)
index 4b589b6..0000000
+++ /dev/null
@@ -1,864 +0,0 @@
- /*
-   Trivial Database 2: free list/block handling
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/likely/likely.h>
-#include <ccan/asearch/asearch.h>
-
-/* We keep an ordered array of offsets. */
-static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off)
-{
-       tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t));
-       if (!new)
-               return false;
-       new[(*num)++] = off;
-       *arr = new;
-       return true;
-}
-
-static enum TDB_ERROR check_header(struct tdb_context *tdb, tdb_off_t *recovery,
-                                  uint64_t *features, size_t *num_capabilities)
-{
-       uint64_t hash_test;
-       struct tdb_header hdr;
-       enum TDB_ERROR ecode;
-       tdb_off_t off, next;
-
-       ecode = tdb_read_convert(tdb, 0, &hdr, sizeof(hdr));
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-       /* magic food should not be converted, so convert back. */
-       tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food));
-
-       hash_test = TDB_HASH_MAGIC;
-       hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
-       if (hdr.hash_test != hash_test) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "check: hash test %llu should be %llu",
-                                 (long long)hdr.hash_test,
-                                 (long long)hash_test);
-       }
-
-       if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "check: bad magic '%.*s'",
-                                 (unsigned)sizeof(hdr.magic_food),
-                                 hdr.magic_food);
-       }
-
-       /* Features which are used must be a subset of features offered. */
-       if (hdr.features_used & ~hdr.features_offered) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "check: features used (0x%llx) which"
-                                 " are not offered (0x%llx)",
-                                 (long long)hdr.features_used,
-                                 (long long)hdr.features_offered);
-       }
-
-       *features = hdr.features_offered;
-       *recovery = hdr.recovery;
-       if (*recovery) {
-               if (*recovery < sizeof(hdr)
-                   || *recovery > tdb->file->map_size) {
-                       return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                         "tdb_check:"
-                                         " invalid recovery offset %zu",
-                                         (size_t)*recovery);
-               }
-       }
-
-       for (off = hdr.capabilities; off && ecode == TDB_SUCCESS; off = next) {
-               const struct tdb_capability *cap;
-               enum TDB_ERROR e;
-
-               cap = tdb_access_read(tdb, off, sizeof(*cap), true);
-               if (TDB_PTR_IS_ERR(cap)) {
-                       return TDB_PTR_ERR(cap);
-               }
-
-               /* All capabilities are unknown. */
-               e = unknown_capability(tdb, "tdb_check", cap->type);
-               next = cap->next;
-               tdb_access_release(tdb, cap);
-               if (e)
-                       return e;
-               (*num_capabilities)++;
-       }
-
-       /* Don't check reserved: they *can* be used later. */
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb,
-                                     tdb_off_t off, unsigned int group_bits,
-                                     uint64_t hprefix,
-                                     unsigned hprefix_bits,
-                                     tdb_off_t used[],
-                                     size_t num_used,
-                                     size_t *num_found,
-                                     enum TDB_ERROR (*check)(TDB_DATA,
-                                                             TDB_DATA, void *),
-                                     void *data);
-
-static enum TDB_ERROR check_hash_chain(struct tdb_context *tdb,
-                                      tdb_off_t off,
-                                      uint64_t hash,
-                                      tdb_off_t used[],
-                                      size_t num_used,
-                                      size_t *num_found,
-                                      enum TDB_ERROR (*check)(TDB_DATA,
-                                                              TDB_DATA,
-                                                              void *),
-                                      void *data)
-{
-       struct tdb_used_record rec;
-       enum TDB_ERROR ecode;
-
-       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (rec_magic(&rec) != TDB_CHAIN_MAGIC) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash chain magic %llu",
-                                 (long long)rec_magic(&rec));
-       }
-
-       if (rec_data_length(&rec) != sizeof(struct tdb_chain)) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check:"
-                                 " Bad hash chain length %llu vs %zu",
-                                 (long long)rec_data_length(&rec),
-                                 sizeof(struct tdb_chain));
-       }
-       if (rec_key_length(&rec) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash chain key length %llu",
-                                 (long long)rec_key_length(&rec));
-       }
-       if (rec_hash(&rec) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash chain hash value %llu",
-                                 (long long)rec_hash(&rec));
-       }
-
-       off += sizeof(rec);
-       ecode = check_hash_tree(tdb, off, 0, hash, 64,
-                               used, num_used, num_found, check, data);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       off = tdb_read_off(tdb, off + offsetof(struct tdb_chain, next));
-       if (TDB_OFF_IS_ERR(off)) {
-               return TDB_OFF_TO_ERR(off);
-       }
-       if (off == 0)
-               return TDB_SUCCESS;
-       (*num_found)++;
-       return check_hash_chain(tdb, off, hash, used, num_used, num_found,
-                               check, data);
-}
-
-static enum TDB_ERROR check_hash_record(struct tdb_context *tdb,
-                                       tdb_off_t off,
-                                       uint64_t hprefix,
-                                       unsigned hprefix_bits,
-                                       tdb_off_t used[],
-                                       size_t num_used,
-                                       size_t *num_found,
-                                       enum TDB_ERROR (*check)(TDB_DATA,
-                                                               TDB_DATA,
-                                                               void *),
-                                       void *data)
-{
-       struct tdb_used_record rec;
-       enum TDB_ERROR ecode;
-
-       if (hprefix_bits >= 64)
-               return check_hash_chain(tdb, off, hprefix, used, num_used,
-                                       num_found, check, data);
-
-       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (rec_magic(&rec) != TDB_HTABLE_MAGIC) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash table magic %llu",
-                                 (long long)rec_magic(&rec));
-       }
-       if (rec_data_length(&rec)
-           != sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check:"
-                                 " Bad hash table length %llu vs %llu",
-                                 (long long)rec_data_length(&rec),
-                                 (long long)sizeof(tdb_off_t)
-                                 << TDB_SUBLEVEL_HASH_BITS);
-       }
-       if (rec_key_length(&rec) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash table key length %llu",
-                                 (long long)rec_key_length(&rec));
-       }
-       if (rec_hash(&rec) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash table hash value %llu",
-                                 (long long)rec_hash(&rec));
-       }
-
-       off += sizeof(rec);
-       return check_hash_tree(tdb, off,
-                              TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
-                              hprefix, hprefix_bits,
-                              used, num_used, num_found, check, data);
-}
-
-static int off_cmp(const tdb_off_t *a, const tdb_off_t *b)
-{
-       /* Can overflow an int. */
-       return *a > *b ? 1
-               : *a < *b ? -1
-               : 0;
-}
-
-static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
-{
-       *used += num;
-
-       return (h >> (64 - *used)) & ((1U << num) - 1);
-}
-
-static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb,
-                                     tdb_off_t off, unsigned int group_bits,
-                                     uint64_t hprefix,
-                                     unsigned hprefix_bits,
-                                     tdb_off_t used[],
-                                     size_t num_used,
-                                     size_t *num_found,
-                                     enum TDB_ERROR (*check)(TDB_DATA,
-                                                             TDB_DATA, void *),
-                                     void *data)
-{
-       unsigned int g, b;
-       const tdb_off_t *hash;
-       struct tdb_used_record rec;
-       enum TDB_ERROR ecode;
-
-       hash = tdb_access_read(tdb, off,
-                              sizeof(tdb_off_t)
-                              << (group_bits + TDB_HASH_GROUP_BITS),
-                              true);
-       if (TDB_PTR_IS_ERR(hash)) {
-               return TDB_PTR_ERR(hash);
-       }
-
-       for (g = 0; g < (1 << group_bits); g++) {
-               const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS);
-               for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) {
-                       unsigned int bucket, i, used_bits;
-                       uint64_t h;
-                       tdb_off_t *p;
-                       if (group[b] == 0)
-                               continue;
-
-                       off = group[b] & TDB_OFF_MASK;
-                       p = asearch(&off, used, num_used, off_cmp);
-                       if (!p) {
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "tdb_check: Invalid offset"
-                                                  " %llu in hash",
-                                                  (long long)off);
-                               goto fail;
-                       }
-                       /* Mark it invalid. */
-                       *p ^= 1;
-                       (*num_found)++;
-
-                       if (hprefix_bits == 64) {
-                               /* Chained entries are unordered. */
-                               if (is_subhash(group[b])) {
-                                       ecode = TDB_ERR_CORRUPT;
-                                       tdb_logerr(tdb, ecode,
-                                                  TDB_LOG_ERROR,
-                                                  "tdb_check: Invalid chain"
-                                                  " entry subhash");
-                                       goto fail;
-                               }
-                               h = hash_record(tdb, off);
-                               if (h != hprefix) {
-                                       ecode = TDB_ERR_CORRUPT;
-                                       tdb_logerr(tdb, ecode,
-                                                  TDB_LOG_ERROR,
-                                                  "check: bad hash chain"
-                                                  " placement"
-                                                  " 0x%llx vs 0x%llx",
-                                                  (long long)h,
-                                                  (long long)hprefix);
-                                       goto fail;
-                               }
-                               ecode = tdb_read_convert(tdb, off, &rec,
-                                                        sizeof(rec));
-                               if (ecode != TDB_SUCCESS) {
-                                       goto fail;
-                               }
-                               goto check;
-                       }
-
-                       if (is_subhash(group[b])) {
-                               uint64_t subprefix;
-                               subprefix = (hprefix
-                                    << (group_bits + TDB_HASH_GROUP_BITS))
-                                       + g * (1 << TDB_HASH_GROUP_BITS) + b;
-
-                               ecode = check_hash_record(tdb,
-                                              group[b] & TDB_OFF_MASK,
-                                              subprefix,
-                                              hprefix_bits
-                                                      + group_bits
-                                                      + TDB_HASH_GROUP_BITS,
-                                              used, num_used, num_found,
-                                              check, data);
-                               if (ecode != TDB_SUCCESS) {
-                                       goto fail;
-                               }
-                               continue;
-                       }
-                       /* A normal entry */
-
-                       /* Does it belong here at all? */
-                       h = hash_record(tdb, off);
-                       used_bits = 0;
-                       if (get_bits(h, hprefix_bits, &used_bits) != hprefix
-                           && hprefix_bits) {
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "check: bad hash placement"
-                                                  " 0x%llx vs 0x%llx",
-                                                  (long long)h,
-                                                  (long long)hprefix);
-                               goto fail;
-                       }
-
-                       /* Does it belong in this group? */
-                       if (get_bits(h, group_bits, &used_bits) != g) {
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "check: bad group %llu"
-                                                  " vs %u",
-                                                  (long long)h, g);
-                               goto fail;
-                       }
-
-                       /* Are bucket bits correct? */
-                       bucket = group[b] & TDB_OFF_HASH_GROUP_MASK;
-                       if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits)
-                           != bucket) {
-                               used_bits -= TDB_HASH_GROUP_BITS;
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "check: bad bucket %u vs %u",
-                                                  (unsigned)get_bits(h,
-                                                       TDB_HASH_GROUP_BITS,
-                                                       &used_bits),
-                                                  bucket);
-                               goto fail;
-                       }
-
-                       /* There must not be any zero entries between
-                        * the bucket it belongs in and this one! */
-                       for (i = bucket;
-                            i != b;
-                            i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) {
-                               if (group[i] == 0) {
-                                       ecode = TDB_ERR_CORRUPT;
-                                       tdb_logerr(tdb, ecode,
-                                                  TDB_LOG_ERROR,
-                                                  "check: bad group placement"
-                                                  " %u vs %u",
-                                                  b, bucket);
-                                       goto fail;
-                               }
-                       }
-
-                       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-                       if (ecode != TDB_SUCCESS) {
-                               goto fail;
-                       }
-
-                       /* Bottom bits must match header. */
-                       if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "tdb_check: Bad hash magic"
-                                                  " at offset %llu"
-                                                  " (0x%llx vs 0x%llx)",
-                                                  (long long)off,
-                                                  (long long)h,
-                                                  (long long)rec_hash(&rec));
-                               goto fail;
-                       }
-
-               check:
-                       if (check) {
-                               TDB_DATA k, d;
-                               const unsigned char *kptr;
-
-                               kptr = tdb_access_read(tdb,
-                                                      off + sizeof(rec),
-                                                      rec_key_length(&rec)
-                                                      + rec_data_length(&rec),
-                                                      false);
-                               if (TDB_PTR_IS_ERR(kptr)) {
-                                       ecode = TDB_PTR_ERR(kptr);
-                                       goto fail;
-                               }
-
-                               k = tdb_mkdata(kptr, rec_key_length(&rec));
-                               d = tdb_mkdata(kptr + k.dsize,
-                                              rec_data_length(&rec));
-                               ecode = check(k, d, data);
-                               tdb_access_release(tdb, kptr);
-                               if (ecode != TDB_SUCCESS) {
-                                       goto fail;
-                               }
-                       }
-               }
-       }
-       tdb_access_release(tdb, hash);
-       return TDB_SUCCESS;
-
-fail:
-       tdb_access_release(tdb, hash);
-       return ecode;
-}
-
-static enum TDB_ERROR check_hash(struct tdb_context *tdb,
-                                tdb_off_t used[],
-                                size_t num_used, size_t num_other_used,
-                                enum TDB_ERROR (*check)(TDB_DATA, TDB_DATA, void *),
-                                void *data)
-{
-       /* Free tables and capabilities also show up as used. */
-       size_t num_found = num_other_used;
-       enum TDB_ERROR ecode;
-
-       ecode = check_hash_tree(tdb, offsetof(struct tdb_header, hashtable),
-                               TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
-                               0, 0, used, num_used, &num_found,
-                               check, data);
-       if (ecode == TDB_SUCCESS) {
-               if (num_found != num_used) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                          "tdb_check: Not all entries"
-                                          " are in hash");
-               }
-       }
-       return ecode;
-}
-
-static enum TDB_ERROR check_free(struct tdb_context *tdb,
-                                tdb_off_t off,
-                                const struct tdb_free_record *frec,
-                                tdb_off_t prev, unsigned int ftable,
-                                unsigned int bucket)
-{
-       enum TDB_ERROR ecode;
-
-       if (frec_magic(frec) != TDB_FREE_MAGIC) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: offset %llu bad magic 0x%llx",
-                                 (long long)off,
-                                 (long long)frec->magic_and_prev);
-       }
-       if (frec_ftable(frec) != ftable) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: offset %llu bad freetable %u",
-                                 (long long)off, frec_ftable(frec));
-
-       }
-
-       ecode = tdb->io->oob(tdb, off,
-                            frec_len(frec)
-                            + sizeof(struct tdb_used_record),
-                            false);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-       if (size_to_bucket(frec_len(frec)) != bucket) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: offset %llu in wrong bucket"
-                                 " (%u vs %u)",
-                                 (long long)off,
-                                 bucket, size_to_bucket(frec_len(frec)));
-       }
-       if (prev && prev != frec_prev(frec)) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: offset %llu bad prev"
-                                 " (%llu vs %llu)",
-                                 (long long)off,
-                                 (long long)prev, (long long)frec_len(frec));
-       }
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR check_free_table(struct tdb_context *tdb,
-                                      tdb_off_t ftable_off,
-                                      unsigned ftable_num,
-                                      tdb_off_t fr[],
-                                      size_t num_free,
-                                      size_t *num_found)
-{
-       struct tdb_freetable ft;
-       tdb_off_t h;
-       unsigned int i;
-       enum TDB_ERROR ecode;
-
-       ecode = tdb_read_convert(tdb, ftable_off, &ft, sizeof(ft));
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (rec_magic(&ft.hdr) != TDB_FTABLE_MAGIC
-           || rec_key_length(&ft.hdr) != 0
-           || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)
-           || rec_hash(&ft.hdr) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Invalid header on free table");
-       }
-
-       for (i = 0; i < TDB_FREE_BUCKETS; i++) {
-               tdb_off_t off, prev = 0, *p, first = 0;
-               struct tdb_free_record f;
-
-               h = bucket_off(ftable_off, i);
-               for (off = tdb_read_off(tdb, h); off; off = f.next) {
-                       if (TDB_OFF_IS_ERR(off)) {
-                               return TDB_OFF_TO_ERR(off);
-                       }
-                       if (!first) {
-                               off &= TDB_OFF_MASK;
-                               first = off;
-                       }
-                       ecode = tdb_read_convert(tdb, off, &f, sizeof(f));
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-                       ecode = check_free(tdb, off, &f, prev, ftable_num, i);
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-
-                       /* FIXME: Check hash bits */
-                       p = asearch(&off, fr, num_free, off_cmp);
-                       if (!p) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: Invalid offset"
-                                                 " %llu in free table",
-                                                 (long long)off);
-                       }
-                       /* Mark it invalid. */
-                       *p ^= 1;
-                       (*num_found)++;
-                       prev = off;
-               }
-
-               if (first) {
-                       /* Now we can check first back pointer. */
-                       ecode = tdb_read_convert(tdb, first, &f, sizeof(f));
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-                       ecode = check_free(tdb, first, &f, prev, ftable_num, i);
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-               }
-       }
-       return TDB_SUCCESS;
-}
-
-/* Slow, but should be very rare. */
-tdb_off_t dead_space(struct tdb_context *tdb, tdb_off_t off)
-{
-       size_t len;
-       enum TDB_ERROR ecode;
-
-       for (len = 0; off + len < tdb->file->map_size; len++) {
-               char c;
-               ecode = tdb->io->tread(tdb, off, &c, 1);
-               if (ecode != TDB_SUCCESS) {
-                       return TDB_ERR_TO_OFF(ecode);
-               }
-               if (c != 0 && c != 0x43)
-                       break;
-       }
-       return len;
-}
-
-static enum TDB_ERROR check_linear(struct tdb_context *tdb,
-                                  tdb_off_t **used, size_t *num_used,
-                                  tdb_off_t **fr, size_t *num_free,
-                                  uint64_t features, tdb_off_t recovery)
-{
-       tdb_off_t off;
-       tdb_len_t len;
-       enum TDB_ERROR ecode;
-       bool found_recovery = false;
-
-       for (off = sizeof(struct tdb_header);
-            off < tdb->file->map_size;
-            off += len) {
-               union {
-                       struct tdb_used_record u;
-                       struct tdb_free_record f;
-                       struct tdb_recovery_record r;
-               } rec;
-               /* r is larger: only get that if we need to. */
-               ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.f));
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-
-               /* If we crash after ftruncate, we can get zeroes or fill. */
-               if (rec.r.magic == TDB_RECOVERY_INVALID_MAGIC
-                   || rec.r.magic ==  0x4343434343434343ULL) {
-                       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-                       if (recovery == off) {
-                               found_recovery = true;
-                               len = sizeof(rec.r) + rec.r.max_len;
-                       } else {
-                               len = dead_space(tdb, off);
-                               if (TDB_OFF_IS_ERR(len)) {
-                                       return TDB_OFF_TO_ERR(len);
-                               }
-                               if (len < sizeof(rec.r)) {
-                                       return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                         TDB_LOG_ERROR,
-                                                         "tdb_check: invalid"
-                                                         " dead space at %zu",
-                                                         (size_t)off);
-                               }
-
-                               tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                                          "Dead space at %zu-%zu (of %zu)",
-                                          (size_t)off, (size_t)(off + len),
-                                          (size_t)tdb->file->map_size);
-                       }
-               } else if (rec.r.magic == TDB_RECOVERY_MAGIC) {
-                       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-                       if (recovery != off) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: unexpected"
-                                                 " recovery record at offset"
-                                                 " %zu",
-                                                 (size_t)off);
-                       }
-                       if (rec.r.len > rec.r.max_len) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: invalid recovery"
-                                                 " length %zu",
-                                                 (size_t)rec.r.len);
-                       }
-                       if (rec.r.eof > tdb->file->map_size) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: invalid old EOF"
-                                                 " %zu", (size_t)rec.r.eof);
-                       }
-                       found_recovery = true;
-                       len = sizeof(rec.r) + rec.r.max_len;
-               } else if (frec_magic(&rec.f) == TDB_FREE_MAGIC) {
-                       len = sizeof(rec.u) + frec_len(&rec.f);
-                       if (off + len > tdb->file->map_size) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: free overlength"
-                                                 " %llu at offset %llu",
-                                                 (long long)len,
-                                                 (long long)off);
-                       }
-                       /* This record should be in free lists. */
-                       if (frec_ftable(&rec.f) != TDB_FTABLE_NONE
-                           && !append(fr, num_free, off)) {
-                               return tdb_logerr(tdb, TDB_ERR_OOM,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: tracking %zu'th"
-                                                 " free record.", *num_free);
-                       }
-               } else if (rec_magic(&rec.u) == TDB_USED_MAGIC
-                          || rec_magic(&rec.u) == TDB_CHAIN_MAGIC
-                          || rec_magic(&rec.u) == TDB_HTABLE_MAGIC
-                          || rec_magic(&rec.u) == TDB_FTABLE_MAGIC
-                          || rec_magic(&rec.u) == TDB_CAP_MAGIC) {
-                       uint64_t klen, dlen, extra;
-
-                       /* This record is used! */
-                       if (!append(used, num_used, off)) {
-                               return tdb_logerr(tdb, TDB_ERR_OOM,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: tracking %zu'th"
-                                                 " used record.", *num_used);
-                       }
-
-                       klen = rec_key_length(&rec.u);
-                       dlen = rec_data_length(&rec.u);
-                       extra = rec_extra_padding(&rec.u);
-
-                       len = sizeof(rec.u) + klen + dlen + extra;
-                       if (off + len > tdb->file->map_size) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: used overlength"
-                                                 " %llu at offset %llu",
-                                                 (long long)len,
-                                                 (long long)off);
-                       }
-
-                       if (len < sizeof(rec.f)) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: too short record"
-                                                 " %llu at %llu",
-                                                 (long long)len,
-                                                 (long long)off);
-                       }
-
-                       /* Check that records have correct 0 at end (but may
-                        * not in future). */
-                       if (extra && !features
-                           && rec_magic(&rec.u) != TDB_CAP_MAGIC) {
-                               const char *p;
-                               char c;
-                               p = tdb_access_read(tdb, off + sizeof(rec.u)
-                                                   + klen + dlen, 1, false);
-                               if (TDB_PTR_IS_ERR(p))
-                                       return TDB_PTR_ERR(p);
-                               c = *p;
-                               tdb_access_release(tdb, p);
-
-                               if (c != '\0') {
-                                       return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                         TDB_LOG_ERROR,
-                                                         "tdb_check:"
-                                                         " non-zero extra"
-                                                         " at %llu",
-                                                         (long long)off);
-                               }
-                       }
-               } else {
-                       return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                         TDB_LOG_ERROR,
-                                         "tdb_check: Bad magic 0x%llx"
-                                         " at offset %zu",
-                                         (long long)rec_magic(&rec.u),
-                                         (size_t)off);
-               }
-       }
-
-       /* We must have found recovery area if there was one. */
-       if (recovery != 0 && !found_recovery) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: expected a recovery area at %zu",
-                                 (size_t)recovery);
-       }
-
-       return TDB_SUCCESS;
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_check_(struct tdb_context *tdb,
-                         enum TDB_ERROR (*check)(TDB_DATA, TDB_DATA, void *),
-                         void *data)
-{
-       tdb_off_t *fr = NULL, *used = NULL, ft, recovery;
-       size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0,
-               num_capabilities = 0;
-       uint64_t features;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_CANT_CHECK) {
-               return tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                                 "tdb_check: database has unknown capability,"
-                                 " cannot check.");
-       }
-
-       ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
-       if (ecode != TDB_SUCCESS) {
-               return tdb->last_error = ecode;
-       }
-
-       ecode = tdb_lock_expand(tdb, F_RDLCK);
-       if (ecode != TDB_SUCCESS) {
-               tdb_allrecord_unlock(tdb, F_RDLCK);
-               return tdb->last_error = ecode;
-       }
-
-       ecode = check_header(tdb, &recovery, &features, &num_capabilities);
-       if (ecode != TDB_SUCCESS)
-               goto out;
-
-       /* First we do a linear scan, checking all records. */
-       ecode = check_linear(tdb, &used, &num_used, &fr, &num_free, features,
-                            recovery);
-       if (ecode != TDB_SUCCESS)
-               goto out;
-
-       for (ft = first_ftable(tdb); ft; ft = next_ftable(tdb, ft)) {
-               if (TDB_OFF_IS_ERR(ft)) {
-                       ecode = TDB_OFF_TO_ERR(ft);
-                       goto out;
-               }
-               ecode = check_free_table(tdb, ft, num_ftables, fr, num_free,
-                                        &num_found);
-               if (ecode != TDB_SUCCESS)
-                       goto out;
-               num_ftables++;
-       }
-
-       /* FIXME: Check key uniqueness? */
-       ecode = check_hash(tdb, used, num_used, num_ftables + num_capabilities,
-                          check, data);
-       if (ecode != TDB_SUCCESS)
-               goto out;
-
-       if (num_found != num_free) {
-               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                  "tdb_check: Not all entries are in"
-                                  " free table");
-       }
-
-out:
-       tdb_allrecord_unlock(tdb, F_RDLCK);
-       tdb_unlock_expand(tdb, F_RDLCK);
-       free(fr);
-       free(used);
-       return tdb->last_error = ecode;
-}
diff --git a/lib/tdb2/doc/TDB1_porting.txt b/lib/tdb2/doc/TDB1_porting.txt
deleted file mode 100644 (file)
index e59295c..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-Interface differences between TDB1 and TDB2.
-
-- tdb2 uses 'struct tdb_data', tdb1 uses 'struct TDB_DATA'.  Use the
-  TDB_DATA typedef if you want portability between the two.
-
-- tdb2 functions return 0 on success, and a negative error on failure,
-  whereas tdb1 functions returned 0 on success, and -1 on failure.
-  tdb1 then used tdb_error() to determine the error; this is also
-  supported in tdb2 to ease backwards compatibility, though the other
-  form is preferred.
-
-- tdb2's tdb_fetch() returns an error, tdb1's returned the data directly
-  (or tdb_null, and you were supposed to check tdb_error() to find out why).
-
-- tdb2's tdb_nextkey() frees the old key's dptr, in tdb1 you needed to do
-  this manually.
-
-- tdb1's tdb_open/tdb_open_ex took an explicit hash size.  tdb2's hash table
-  resizes as required.
-
-- tdb2 uses a linked list of attribute structures to implement logging and
-  alternate hashes.  tdb1 used tdb_open_ex, which was not extensible.
-
-- tdb2 does locking on read-only databases (ie. O_RDONLY passed to tdb_open).
-  tdb1 did not: use the TDB_NOLOCK flag if you want to suppress locking.
-
-- tdb2's log function is simpler than tdb1's log function.  The string is
-  already formatted, and it takes an enum tdb_log_level not a tdb_debug_level,
-  and which has only three values: TDB_LOG_ERROR, TDB_LOG_USE_ERROR and
-  TDB_LOG_WARNING.
-
-- tdb2 provides tdb_deq() for comparing two struct tdb_data.
-
-- tdb2's tdb_name() returns a copy of the name even for TDB_INTERNAL dbs.
-
-- tdb2 does not need tdb_reopen() or tdb_reopen_all().  If you call
-  fork() after during certain operations the child should close the
-  tdb, or complete the operations before continuing to use the tdb:
-
-       tdb_transaction_start(): child must tdb_transaction_cancel()
-       tdb_lockall(): child must call tdb_unlockall()
-       tdb_lockall_read(): child must call tdb_unlockall_read()
-       tdb_chainlock(): child must call tdb_chainunlock()
-       tdb_parse() callback: child must return from tdb_parse()
-
-- tdb2 will not open a non-tdb file, even if O_CREAT is specified.
-
-- There is no tdb_traverse_read.  For operating on TDB1 files, you can
-  simulate it by tdb_add_flag(tdb, TDB_RDONLY); tdb_traverse();
-  tdb_remove_flag(tdb, TDB_RDONLY).  This may be desirable because
-  traverse on TDB1 files use a write lock on the entire database
-  unless it's read-only.
-
-- Failure inside a transaction (such as a lock function failing) does
-  not implicitly cancel the transaction; you still need to call
-  tdb_transaction_cancel().
-
-TDB1 Compatibility:
-
-- tdb2's offers a tdb1_incompatible_hash function, which is the same
-  as the default hash with the TDB_INCOMPATIBLE_HASH flag.  There is
-  no way of marking an old TDB incompatible with versions < 1.2.6
-  while using any other hash.
-
-- The TDB_ATTRIBUTE_TDB1_HASHSIZE attribute can be used to control the
-  hash size, but only when creating (ie. O_CREAT) a TDB1
-  (ie. TDB_VERSION1).
-
-- There is no TDB_CLEAR_IF_FIRST flag; it has severe scalability and
-  API problems.  If necessary, you can emulate this by using the open
-  hook and placing a 1-byte lock at offset 4.  If your program forks,
-  you will need to place this lock again in the child.
diff --git a/lib/tdb2/free.c b/lib/tdb2/free.c
deleted file mode 100644 (file)
index c4015a0..0000000
+++ /dev/null
@@ -1,976 +0,0 @@
- /*
-   Trivial Database 2: free list/block handling
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/likely/likely.h>
-#include <ccan/ilog/ilog.h>
-#include <time.h>
-#include <assert.h>
-#include <limits.h>
-
-static unsigned fls64(uint64_t val)
-{
-       return ilog64(val);
-}
-
-/* In which bucket would we find a particular record size? (ignoring header) */
-unsigned int size_to_bucket(tdb_len_t data_len)
-{
-       unsigned int bucket;
-
-       /* We can't have records smaller than this. */
-       assert(data_len >= TDB_MIN_DATA_LEN);
-
-       /* Ignoring the header... */
-       if (data_len - TDB_MIN_DATA_LEN <= 64) {
-               /* 0 in bucket 0, 8 in bucket 1... 64 in bucket 8. */
-               bucket = (data_len - TDB_MIN_DATA_LEN) / 8;
-       } else {
-               /* After that we go power of 2. */
-               bucket = fls64(data_len - TDB_MIN_DATA_LEN) + 2;
-       }
-
-       if (unlikely(bucket >= TDB_FREE_BUCKETS))
-               bucket = TDB_FREE_BUCKETS - 1;
-       return bucket;
-}
-
-tdb_off_t first_ftable(struct tdb_context *tdb)
-{
-       return tdb_read_off(tdb, offsetof(struct tdb_header, free_table));
-}
-
-tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable)
-{
-       return tdb_read_off(tdb, ftable + offsetof(struct tdb_freetable,next));
-}
-
-enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb)
-{
-       /* Use reservoir sampling algorithm to select a free list at random. */
-       unsigned int rnd, max = 0, count = 0;
-       tdb_off_t off;
-
-       tdb->ftable_off = off = first_ftable(tdb);
-       tdb->ftable = 0;
-
-       while (off) {
-               if (TDB_OFF_IS_ERR(off)) {
-                       return TDB_OFF_TO_ERR(off);
-               }
-
-               rnd = random();
-               if (rnd >= max) {
-                       tdb->ftable_off = off;
-                       tdb->ftable = count;
-                       max = rnd;
-               }
-
-               off = next_ftable(tdb, off);
-               count++;
-       }
-       return TDB_SUCCESS;
-}
-
-/* Offset of a given bucket. */
-tdb_off_t bucket_off(tdb_off_t ftable_off, unsigned bucket)
-{
-       return ftable_off + offsetof(struct tdb_freetable, buckets)
-               + bucket * sizeof(tdb_off_t);
-}
-
-/* Returns free_buckets + 1, or list number to search, or -ve error. */
-static tdb_off_t find_free_head(struct tdb_context *tdb,
-                               tdb_off_t ftable_off,
-                               tdb_off_t bucket)
-{
-       /* Speculatively search for a non-zero bucket. */
-       return tdb_find_nonzero_off(tdb, bucket_off(ftable_off, 0),
-                                   bucket, TDB_FREE_BUCKETS);
-}
-
-static void check_list(struct tdb_context *tdb, tdb_off_t b_off)
-{
-#ifdef CCAN_TDB2_DEBUG
-       tdb_off_t off, prev = 0, first;
-       struct tdb_free_record r;
-
-       first = off = (tdb_read_off(tdb, b_off) & TDB_OFF_MASK);
-       while (off != 0) {
-               tdb_read_convert(tdb, off, &r, sizeof(r));
-               if (frec_magic(&r) != TDB_FREE_MAGIC)
-                       abort();
-               if (prev && frec_prev(&r) != prev)
-                       abort();
-               prev = off;
-               off = r.next;
-       }
-
-       if (first) {
-               tdb_read_convert(tdb, first, &r, sizeof(r));
-               if (frec_prev(&r) != prev)
-                       abort();
-       }
-#endif
-}
-
-/* Remove from free bucket. */
-static enum TDB_ERROR remove_from_list(struct tdb_context *tdb,
-                                      tdb_off_t b_off, tdb_off_t r_off,
-                                      const struct tdb_free_record *r)
-{
-       tdb_off_t off, prev_next, head;
-       enum TDB_ERROR ecode;
-
-       /* Is this only element in list?  Zero out bucket, and we're done. */
-       if (frec_prev(r) == r_off)
-               return tdb_write_off(tdb, b_off, 0);
-
-       /* off = &r->prev->next */
-       off = frec_prev(r) + offsetof(struct tdb_free_record, next);
-
-       /* Get prev->next */
-       prev_next = tdb_read_off(tdb, off);
-       if (TDB_OFF_IS_ERR(prev_next))
-               return TDB_OFF_TO_ERR(prev_next);
-
-       /* If prev->next == 0, we were head: update bucket to point to next. */
-       if (prev_next == 0) {
-               /* We must preserve upper bits. */
-               head = tdb_read_off(tdb, b_off);
-               if (TDB_OFF_IS_ERR(head))
-                       return TDB_OFF_TO_ERR(head);
-
-               if ((head & TDB_OFF_MASK) != r_off) {
-                       return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                         "remove_from_list:"
-                                         " %llu head %llu on list %llu",
-                                         (long long)r_off,
-                                         (long long)head,
-                                         (long long)b_off);
-               }
-               head = ((head & ~TDB_OFF_MASK) | r->next);
-               ecode = tdb_write_off(tdb, b_off, head);
-               if (ecode != TDB_SUCCESS)
-                       return ecode;
-       } else {
-               /* r->prev->next = r->next */
-               ecode = tdb_write_off(tdb, off, r->next);
-               if (ecode != TDB_SUCCESS)
-                       return ecode;
-       }
-
-       /* If we were the tail, off = &head->prev. */
-       if (r->next == 0) {
-               head = tdb_read_off(tdb, b_off);
-               if (TDB_OFF_IS_ERR(head))
-                       return TDB_OFF_TO_ERR(head);
-               head &= TDB_OFF_MASK;
-               off = head + offsetof(struct tdb_free_record, magic_and_prev);
-       } else {
-               /* off = &r->next->prev */
-               off = r->next + offsetof(struct tdb_free_record,
-                                        magic_and_prev);
-       }
-
-#ifdef CCAN_TDB2_DEBUG
-       /* *off == r */
-       if ((tdb_read_off(tdb, off) & TDB_OFF_MASK) != r_off) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "remove_from_list:"
-                                 " %llu bad prev in list %llu",
-                                 (long long)r_off, (long long)b_off);
-       }
-#endif
-       /* r->next->prev = r->prev */
-       return tdb_write_off(tdb, off, r->magic_and_prev);
-}
-
-/* Enqueue in this free bucket: sets coalesce if we've added 128
- * entries to it. */
-static enum TDB_ERROR enqueue_in_free(struct tdb_context *tdb,
-                                     tdb_off_t b_off,
-                                     tdb_off_t off,
-                                     tdb_len_t len,
-                                     bool *coalesce)
-{
-       struct tdb_free_record new;
-       enum TDB_ERROR ecode;
-       tdb_off_t prev, head;
-       uint64_t magic = (TDB_FREE_MAGIC << (64 - TDB_OFF_UPPER_STEAL));
-
-       head = tdb_read_off(tdb, b_off);
-       if (TDB_OFF_IS_ERR(head))
-               return TDB_OFF_TO_ERR(head);
-
-       /* We only need to set ftable_and_len; rest is set in enqueue_in_free */
-       new.ftable_and_len = ((uint64_t)tdb->ftable
-                             << (64 - TDB_OFF_UPPER_STEAL))
-               | len;
-
-       /* new->next = head. */
-       new.next = (head & TDB_OFF_MASK);
-
-       /* First element?  Prev points to ourselves. */
-       if (!new.next) {
-               new.magic_and_prev = (magic | off);
-       } else {
-               /* new->prev = next->prev */
-               prev = tdb_read_off(tdb,
-                                   new.next + offsetof(struct tdb_free_record,
-                                                       magic_and_prev));
-               new.magic_and_prev = prev;
-               if (frec_magic(&new) != TDB_FREE_MAGIC) {
-                       return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                         "enqueue_in_free: %llu bad head"
-                                         " prev %llu",
-                                         (long long)new.next,
-                                         (long long)prev);
-               }
-               /* next->prev = new. */
-               ecode = tdb_write_off(tdb, new.next
-                                     + offsetof(struct tdb_free_record,
-                                                magic_and_prev),
-                                     off | magic);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-
-#ifdef CCAN_TDB2_DEBUG
-               prev = tdb_read_off(tdb, frec_prev(&new)
-                                   + offsetof(struct tdb_free_record, next));
-               if (prev != 0) {
-                       return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                         "enqueue_in_free:"
-                                         " %llu bad tail next ptr %llu",
-                                         (long long)frec_prev(&new)
-                                         + offsetof(struct tdb_free_record,
-                                                    next),
-                                         (long long)prev);
-               }
-#endif
-       }
-
-       /* Update enqueue count, but don't set high bit: see TDB_OFF_IS_ERR */
-       if (*coalesce)
-               head += (1ULL << (64 - TDB_OFF_UPPER_STEAL));
-       head &= ~(TDB_OFF_MASK | (1ULL << 63));
-       head |= off;
-
-       ecode = tdb_write_off(tdb, b_off, head);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* It's time to coalesce if counter wrapped. */
-       if (*coalesce)
-               *coalesce = ((head & ~TDB_OFF_MASK) == 0);
-
-       return tdb_write_convert(tdb, off, &new, sizeof(new));
-}
-
-static tdb_off_t ftable_offset(struct tdb_context *tdb, unsigned int ftable)
-{
-       tdb_off_t off;
-       unsigned int i;
-
-       if (likely(tdb->ftable == ftable))
-               return tdb->ftable_off;
-
-       off = first_ftable(tdb);
-       for (i = 0; i < ftable; i++) {
-               if (TDB_OFF_IS_ERR(off)) {
-                       break;
-               }
-               off = next_ftable(tdb, off);
-       }
-       return off;
-}
-
-/* Note: we unlock the current bucket if fail (-ve), or coalesce (+ve) and
- * need to blatt the *protect record (which is set to an error). */
-static tdb_len_t coalesce(struct tdb_context *tdb,
-                         tdb_off_t off, tdb_off_t b_off,
-                         tdb_len_t data_len,
-                         tdb_off_t *protect)
-{
-       tdb_off_t end;
-       struct tdb_free_record rec;
-       enum TDB_ERROR ecode;
-
-       tdb->stats.alloc_coalesce_tried++;
-       end = off + sizeof(struct tdb_used_record) + data_len;
-
-       while (end < tdb->file->map_size) {
-               const struct tdb_free_record *r;
-               tdb_off_t nb_off;
-               unsigned ftable, bucket;
-
-               r = tdb_access_read(tdb, end, sizeof(*r), true);
-               if (TDB_PTR_IS_ERR(r)) {
-                       ecode = TDB_PTR_ERR(r);
-                       goto err;
-               }
-
-               if (frec_magic(r) != TDB_FREE_MAGIC
-                   || frec_ftable(r) == TDB_FTABLE_NONE) {
-                       tdb_access_release(tdb, r);
-                       break;
-               }
-
-               ftable = frec_ftable(r);
-               bucket = size_to_bucket(frec_len(r));
-               nb_off = ftable_offset(tdb, ftable);
-               if (TDB_OFF_IS_ERR(nb_off)) {
-                       tdb_access_release(tdb, r);
-                       ecode = TDB_OFF_TO_ERR(nb_off);
-                       goto err;
-               }
-               nb_off = bucket_off(nb_off, bucket);
-               tdb_access_release(tdb, r);
-
-               /* We may be violating lock order here, so best effort. */
-               if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT)
-                   != TDB_SUCCESS) {
-                       tdb->stats.alloc_coalesce_lockfail++;
-                       break;
-               }
-
-               /* Now we have lock, re-check. */
-               ecode = tdb_read_convert(tdb, end, &rec, sizeof(rec));
-               if (ecode != TDB_SUCCESS) {
-                       tdb_unlock_free_bucket(tdb, nb_off);
-                       goto err;
-               }
-
-               if (unlikely(frec_magic(&rec) != TDB_FREE_MAGIC)) {
-                       tdb->stats.alloc_coalesce_race++;
-                       tdb_unlock_free_bucket(tdb, nb_off);
-                       break;
-               }
-
-               if (unlikely(frec_ftable(&rec) != ftable)
-                   || unlikely(size_to_bucket(frec_len(&rec)) != bucket)) {
-                       tdb->stats.alloc_coalesce_race++;
-                       tdb_unlock_free_bucket(tdb, nb_off);
-                       break;
-               }
-
-               /* Did we just mess up a record you were hoping to use? */
-               if (end == *protect) {
-                       tdb->stats.alloc_coalesce_iterate_clash++;
-                       *protect = TDB_ERR_TO_OFF(TDB_ERR_NOEXIST);
-               }
-
-               ecode = remove_from_list(tdb, nb_off, end, &rec);
-               check_list(tdb, nb_off);
-               if (ecode != TDB_SUCCESS) {
-                       tdb_unlock_free_bucket(tdb, nb_off);
-                       goto err;
-               }
-
-               end += sizeof(struct tdb_used_record) + frec_len(&rec);
-               tdb_unlock_free_bucket(tdb, nb_off);
-               tdb->stats.alloc_coalesce_num_merged++;
-       }
-
-       /* Didn't find any adjacent free? */
-       if (end == off + sizeof(struct tdb_used_record) + data_len)
-               return 0;
-
-       /* Before we expand, check this isn't one you wanted protected? */
-       if (off == *protect) {
-               *protect = TDB_ERR_TO_OFF(TDB_ERR_EXISTS);
-               tdb->stats.alloc_coalesce_iterate_clash++;
-       }
-
-       /* OK, expand initial record */
-       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-       if (ecode != TDB_SUCCESS) {
-               goto err;
-       }
-
-       if (frec_len(&rec) != data_len) {
-               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                  "coalesce: expected data len %zu not %zu",
-                                  (size_t)data_len, (size_t)frec_len(&rec));
-               goto err;
-       }
-
-       ecode = remove_from_list(tdb, b_off, off, &rec);
-       check_list(tdb, b_off);
-       if (ecode != TDB_SUCCESS) {
-               goto err;
-       }
-
-       /* Try locking violation first.  We don't allow coalesce recursion! */
-       ecode = add_free_record(tdb, off, end - off, TDB_LOCK_NOWAIT, false);
-       if (ecode != TDB_SUCCESS) {
-               /* Need to drop lock.  Can't rely on anything stable. */
-               tdb->stats.alloc_coalesce_lockfail++;
-               *protect = TDB_ERR_TO_OFF(TDB_ERR_CORRUPT);
-
-               /* We have to drop this to avoid deadlocks, so make sure record
-                * doesn't get coalesced by someone else! */
-               rec.ftable_and_len = (TDB_FTABLE_NONE
-                                     << (64 - TDB_OFF_UPPER_STEAL))
-                       | (end - off - sizeof(struct tdb_used_record));
-               ecode = tdb_write_off(tdb,
-                                     off + offsetof(struct tdb_free_record,
-                                                    ftable_and_len),
-                                     rec.ftable_and_len);
-               if (ecode != TDB_SUCCESS) {
-                       goto err;
-               }
-
-               tdb_unlock_free_bucket(tdb, b_off);
-
-               ecode = add_free_record(tdb, off, end - off, TDB_LOCK_WAIT,
-                                       false);
-               if (ecode != TDB_SUCCESS) {
-                       return TDB_ERR_TO_OFF(ecode);
-               }
-       } else if (TDB_OFF_IS_ERR(*protect)) {
-               /* For simplicity, we always drop lock if they can't continue */
-               tdb_unlock_free_bucket(tdb, b_off);
-       }
-       tdb->stats.alloc_coalesce_succeeded++;
-
-       /* Return usable length. */
-       return end - off - sizeof(struct tdb_used_record);
-
-err:
-       /* To unify error paths, we *always* unlock bucket on error. */
-       tdb_unlock_free_bucket(tdb, b_off);
-       return TDB_ERR_TO_OFF(ecode);
-}
-
-/* List is locked: we unlock it. */
-static enum TDB_ERROR coalesce_list(struct tdb_context *tdb,
-                                   tdb_off_t ftable_off,
-                                   tdb_off_t b_off,
-                                   unsigned int limit)
-{
-       enum TDB_ERROR ecode;
-       tdb_off_t off;
-
-       off = tdb_read_off(tdb, b_off);
-       if (TDB_OFF_IS_ERR(off)) {
-               ecode = TDB_OFF_TO_ERR(off);
-               goto unlock_err;
-       }
-       /* A little bit of paranoia: counter should be 0. */
-       off &= TDB_OFF_MASK;
-
-       while (off && limit--) {
-               struct tdb_free_record rec;
-               tdb_len_t coal;
-               tdb_off_t next;
-
-               ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               next = rec.next;
-               coal = coalesce(tdb, off, b_off, frec_len(&rec), &next);
-               if (TDB_OFF_IS_ERR(coal)) {
-                       /* This has already unlocked on error. */
-                       return TDB_OFF_TO_ERR(coal);
-               }
-               if (TDB_OFF_IS_ERR(next)) {
-                       /* Coalescing had to unlock, so stop. */
-                       return TDB_SUCCESS;
-               }
-               /* Keep going if we're doing well... */
-               limit += size_to_bucket(coal / 16 + TDB_MIN_DATA_LEN);
-               off = next;
-       }
-
-       /* Now, move those elements to the tail of the list so we get something
-        * else next time. */
-       if (off) {
-               struct tdb_free_record oldhrec, newhrec, oldtrec, newtrec;
-               tdb_off_t oldhoff, oldtoff, newtoff;
-
-               /* The record we were up to is the new head. */
-               ecode = tdb_read_convert(tdb, off, &newhrec, sizeof(newhrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               /* Get the new tail. */
-               newtoff = frec_prev(&newhrec);
-               ecode = tdb_read_convert(tdb, newtoff, &newtrec,
-                                        sizeof(newtrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               /* Get the old head. */
-               oldhoff = tdb_read_off(tdb, b_off);
-               if (TDB_OFF_IS_ERR(oldhoff)) {
-                       ecode = TDB_OFF_TO_ERR(oldhoff);
-                       goto unlock_err;
-               }
-
-               /* This could happen if they all coalesced away. */
-               if (oldhoff == off)
-                       goto out;
-
-               ecode = tdb_read_convert(tdb, oldhoff, &oldhrec,
-                                        sizeof(oldhrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               /* Get the old tail. */
-               oldtoff = frec_prev(&oldhrec);
-               ecode = tdb_read_convert(tdb, oldtoff, &oldtrec,
-                                        sizeof(oldtrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               /* Old tail's next points to old head. */
-               oldtrec.next = oldhoff;
-
-               /* Old head's prev points to old tail. */
-               oldhrec.magic_and_prev
-                       = (TDB_FREE_MAGIC << (64 - TDB_OFF_UPPER_STEAL))
-                       | oldtoff;
-
-               /* New tail's next is 0. */
-               newtrec.next = 0;
-
-               /* Write out the modified versions. */
-               ecode = tdb_write_convert(tdb, oldtoff, &oldtrec,
-                                         sizeof(oldtrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               ecode = tdb_write_convert(tdb, oldhoff, &oldhrec,
-                                         sizeof(oldhrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               ecode = tdb_write_convert(tdb, newtoff, &newtrec,
-                                         sizeof(newtrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               /* And finally link in new head. */
-               ecode = tdb_write_off(tdb, b_off, off);
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-       }
-out:
-       tdb_unlock_free_bucket(tdb, b_off);
-       return TDB_SUCCESS;
-
-unlock_err:
-       tdb_unlock_free_bucket(tdb, b_off);
-       return ecode;
-}
-
-/* List must not be locked if coalesce_ok is set. */
-enum TDB_ERROR add_free_record(struct tdb_context *tdb,
-                              tdb_off_t off, tdb_len_t len_with_header,
-                              enum tdb_lock_flags waitflag,
-                              bool coalesce_ok)
-{
-       tdb_off_t b_off;
-       tdb_len_t len;
-       enum TDB_ERROR ecode;
-
-       assert(len_with_header >= sizeof(struct tdb_free_record));
-
-       len = len_with_header - sizeof(struct tdb_used_record);
-
-       b_off = bucket_off(tdb->ftable_off, size_to_bucket(len));
-       ecode = tdb_lock_free_bucket(tdb, b_off, waitflag);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       ecode = enqueue_in_free(tdb, b_off, off, len, &coalesce_ok);
-       check_list(tdb, b_off);
-
-       /* Coalescing unlocks free list. */
-       if (!ecode && coalesce_ok)
-               ecode = coalesce_list(tdb, tdb->ftable_off, b_off, 2);
-       else
-               tdb_unlock_free_bucket(tdb, b_off);
-       return ecode;
-}
-
-static size_t adjust_size(size_t keylen, size_t datalen)
-{
-       size_t size = keylen + datalen;
-
-       if (size < TDB_MIN_DATA_LEN)
-               size = TDB_MIN_DATA_LEN;
-
-       /* Round to next uint64_t boundary. */
-       return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL);
-}
-
-/* If we have enough left over to be useful, split that off. */
-static size_t record_leftover(size_t keylen, size_t datalen,
-                             bool want_extra, size_t total_len)
-{
-       ssize_t leftover;
-
-       if (want_extra)
-               datalen += datalen / 2;
-       leftover = total_len - adjust_size(keylen, datalen);
-
-       if (leftover < (ssize_t)sizeof(struct tdb_free_record))
-               return 0;
-
-       return leftover;
-}
-
-/* We need size bytes to put our key and data in. */
-static tdb_off_t lock_and_alloc(struct tdb_context *tdb,
-                               tdb_off_t ftable_off,
-                               tdb_off_t bucket,
-                               size_t keylen, size_t datalen,
-                               bool want_extra,
-                               unsigned magic,
-                               unsigned hashlow)
-{
-       tdb_off_t off, b_off,best_off;
-       struct tdb_free_record best = { 0 };
-       double multiplier;
-       size_t size = adjust_size(keylen, datalen);
-       enum TDB_ERROR ecode;
-
-       tdb->stats.allocs++;
-       b_off = bucket_off(ftable_off, bucket);
-
-       /* FIXME: Try non-blocking wait first, to measure contention. */
-       /* Lock this bucket. */
-       ecode = tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT);
-       if (ecode != TDB_SUCCESS) {
-               return TDB_ERR_TO_OFF(ecode);
-       }
-
-       best.ftable_and_len = -1ULL;
-       best_off = 0;
-
-       /* Get slack if we're after extra. */
-       if (want_extra)
-               multiplier = 1.5;
-       else
-               multiplier = 1.0;
-
-       /* Walk the list to see if any are large enough, getting less fussy
-        * as we go. */
-       off = tdb_read_off(tdb, b_off);
-       if (TDB_OFF_IS_ERR(off)) {
-               ecode = TDB_OFF_TO_ERR(off);
-               goto unlock_err;
-       }
-       off &= TDB_OFF_MASK;
-
-       while (off) {
-               const struct tdb_free_record *r;
-               tdb_len_t len;
-               tdb_off_t next;
-
-               r = tdb_access_read(tdb, off, sizeof(*r), true);
-               if (TDB_PTR_IS_ERR(r)) {
-                       ecode = TDB_PTR_ERR(r);
-                       goto unlock_err;
-               }
-
-               if (frec_magic(r) != TDB_FREE_MAGIC) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                          "lock_and_alloc:"
-                                          " %llu non-free 0x%llx",
-                                          (long long)off,
-                                          (long long)r->magic_and_prev);
-                       tdb_access_release(tdb, r);
-                       goto unlock_err;
-               }
-
-               if (frec_len(r) >= size && frec_len(r) < frec_len(&best)) {
-                       best_off = off;
-                       best = *r;
-               }
-
-               if (frec_len(&best) <= size * multiplier && best_off) {
-                       tdb_access_release(tdb, r);
-                       break;
-               }
-
-               multiplier *= 1.01;
-
-               next = r->next;
-               len = frec_len(r);
-               tdb_access_release(tdb, r);
-               off = next;
-       }
-
-       /* If we found anything at all, use it. */
-       if (best_off) {
-               struct tdb_used_record rec;
-               size_t leftover;
-
-               /* We're happy with this size: take it. */
-               ecode = remove_from_list(tdb, b_off, best_off, &best);
-               check_list(tdb, b_off);
-               if (ecode != TDB_SUCCESS) {
-                       goto unlock_err;
-               }
-
-               leftover = record_leftover(keylen, datalen, want_extra,
-                                          frec_len(&best));
-
-               assert(keylen + datalen + leftover <= frec_len(&best));
-               /* We need to mark non-free before we drop lock, otherwise
-                * coalesce() could try to merge it! */
-               ecode = set_header(tdb, &rec, magic, keylen, datalen,
-                                  frec_len(&best) - leftover, hashlow);
-               if (ecode != TDB_SUCCESS) {
-                       goto unlock_err;
-               }
-
-               ecode = tdb_write_convert(tdb, best_off, &rec, sizeof(rec));
-               if (ecode != TDB_SUCCESS) {
-                       goto unlock_err;
-               }
-
-               /* For futureproofing, we put a 0 in any unused space. */
-               if (rec_extra_padding(&rec)) {
-                       ecode = tdb->io->twrite(tdb, best_off + sizeof(rec)
-                                               + keylen + datalen, "", 1);
-                       if (ecode != TDB_SUCCESS) {
-                               goto unlock_err;
-                       }
-               }
-
-               /* Bucket of leftover will be <= current bucket, so nested
-                * locking is allowed. */
-               if (leftover) {
-                       tdb->stats.alloc_leftover++;
-                       ecode = add_free_record(tdb,
-                                               best_off + sizeof(rec)
-                                               + frec_len(&best) - leftover,
-                                               leftover, TDB_LOCK_WAIT, false);
-                       if (ecode != TDB_SUCCESS) {
-                               best_off = TDB_ERR_TO_OFF(ecode);
-                       }
-               }
-               tdb_unlock_free_bucket(tdb, b_off);
-
-               return best_off;
-       }
-
-       tdb_unlock_free_bucket(tdb, b_off);
-       return 0;
-
-unlock_err:
-       tdb_unlock_free_bucket(tdb, b_off);
-       return TDB_ERR_TO_OFF(ecode);
-}
-
-/* Get a free block from current free list, or 0 if none, -ve on error. */
-static tdb_off_t get_free(struct tdb_context *tdb,
-                         size_t keylen, size_t datalen, bool want_extra,
-                         unsigned magic, unsigned hashlow)
-{
-       tdb_off_t off, ftable_off;
-       tdb_off_t start_b, b, ftable;
-       bool wrapped = false;
-
-       /* If they are growing, add 50% to get to higher bucket. */
-       if (want_extra)
-               start_b = size_to_bucket(adjust_size(keylen,
-                                                    datalen + datalen / 2));
-       else
-               start_b = size_to_bucket(adjust_size(keylen, datalen));
-
-       ftable_off = tdb->ftable_off;
-       ftable = tdb->ftable;
-       while (!wrapped || ftable_off != tdb->ftable_off) {
-               /* Start at exact size bucket, and search up... */
-               for (b = find_free_head(tdb, ftable_off, start_b);
-                    b < TDB_FREE_BUCKETS;
-                    b = find_free_head(tdb, ftable_off, b + 1)) {
-                       /* Try getting one from list. */
-                       off = lock_and_alloc(tdb, ftable_off,
-                                            b, keylen, datalen, want_extra,
-                                            magic, hashlow);
-                       if (TDB_OFF_IS_ERR(off))
-                               return off;
-                       if (off != 0) {
-                               if (b == start_b)
-                                       tdb->stats.alloc_bucket_exact++;
-                               if (b == TDB_FREE_BUCKETS - 1)
-                                       tdb->stats.alloc_bucket_max++;
-                               /* Worked?  Stay using this list. */
-                               tdb->ftable_off = ftable_off;
-                               tdb->ftable = ftable;
-                               return off;
-                       }
-                       /* Didn't work.  Try next bucket. */
-               }
-
-               if (TDB_OFF_IS_ERR(b)) {
-                       return b;
-               }
-
-               /* Hmm, try next table. */
-               ftable_off = next_ftable(tdb, ftable_off);
-               if (TDB_OFF_IS_ERR(ftable_off)) {
-                       return ftable_off;
-               }
-               ftable++;
-
-               if (ftable_off == 0) {
-                       wrapped = true;
-                       ftable_off = first_ftable(tdb);
-                       if (TDB_OFF_IS_ERR(ftable_off)) {
-                               return ftable_off;
-                       }
-                       ftable = 0;
-               }
-       }
-
-       return 0;
-}
-
-enum TDB_ERROR set_header(struct tdb_context *tdb,
-                         struct tdb_used_record *rec,
-                         unsigned magic, uint64_t keylen, uint64_t datalen,
-                         uint64_t actuallen, unsigned hashlow)
-{
-       uint64_t keybits = (fls64(keylen) + 1) / 2;
-
-       /* Use bottom bits of hash, so it's independent of hash table size. */
-       rec->magic_and_meta = (hashlow & ((1 << 11)-1))
-               | ((actuallen - (keylen + datalen)) << 11)
-               | (keybits << 43)
-               | ((uint64_t)magic << 48);
-       rec->key_and_data_len = (keylen | (datalen << (keybits*2)));
-
-       /* Encoding can fail on big values. */
-       if (rec_key_length(rec) != keylen
-           || rec_data_length(rec) != datalen
-           || rec_extra_padding(rec) != actuallen - (keylen + datalen)) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "Could not encode k=%llu,d=%llu,a=%llu",
-                                 (long long)keylen, (long long)datalen,
-                                 (long long)actuallen);
-       }
-       return TDB_SUCCESS;
-}
-
-/* You need 'size', this tells you how much you should expand by. */
-tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size)
-{
-       tdb_off_t new_size, top_size;
-
-       /* limit size in order to avoid using up huge amounts of memory for
-        * in memory tdbs if an oddball huge record creeps in */
-       if (size > 100 * 1024) {
-               top_size = map_size + size * 2;
-       } else {
-               top_size = map_size + size * 100;
-       }
-
-       /* always make room for at least top_size more records, and at
-          least 25% more space. if the DB is smaller than 100MiB,
-          otherwise grow it by 10% only. */
-       if (map_size > 100 * 1024 * 1024) {
-               new_size = map_size * 1.10;
-       } else {
-               new_size = map_size * 1.25;
-       }
-
-       /* Round the database up to a multiple of the page size */
-       if (new_size < top_size)
-               new_size = top_size;
-       return new_size - map_size;
-}
-
-/* Expand the database. */
-static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
-{
-       uint64_t old_size;
-       tdb_len_t wanted;
-       enum TDB_ERROR ecode;
-
-       /* Need to hold a hash lock to expand DB: transactions rely on it. */
-       if (!(tdb->flags & TDB_NOLOCK)
-           && !tdb->file->allrecord_lock.count && !tdb_has_hash_locks(tdb)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_expand: must hold lock during expand");
-       }
-
-       /* Only one person can expand file at a time. */
-       ecode = tdb_lock_expand(tdb, F_WRLCK);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* Someone else may have expanded the file, so retry. */
-       old_size = tdb->file->map_size;
-       tdb->io->oob(tdb, tdb->file->map_size, 1, true);
-       if (tdb->file->map_size != old_size) {
-               tdb_unlock_expand(tdb, F_WRLCK);
-               return TDB_SUCCESS;
-       }
-
-       /* Overallocate. */
-       wanted = tdb_expand_adjust(old_size, size);
-       /* We need room for the record header too. */
-       wanted = adjust_size(0, sizeof(struct tdb_used_record) + wanted);
-
-       ecode = tdb->io->expand_file(tdb, wanted);
-       if (ecode != TDB_SUCCESS) {
-               tdb_unlock_expand(tdb, F_WRLCK);
-               return ecode;
-       }
-
-       /* We need to drop this lock before adding free record. */
-       tdb_unlock_expand(tdb, F_WRLCK);
-
-       tdb->stats.expands++;
-       return add_free_record(tdb, old_size, wanted, TDB_LOCK_WAIT, true);
-}
-
-/* This won't fail: it will expand the database if it has to. */
-tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
-               uint64_t hash, unsigned magic, bool growing)
-{
-       tdb_off_t off;
-
-       /* We can't hold pointers during this: we could unmap! */
-       assert(!tdb->direct_access);
-
-       for (;;) {
-               enum TDB_ERROR ecode;
-               off = get_free(tdb, keylen, datalen, growing, magic, hash);
-               if (likely(off != 0))
-                       break;
-
-               ecode = tdb_expand(tdb, adjust_size(keylen, datalen));
-               if (ecode != TDB_SUCCESS) {
-                       return TDB_ERR_TO_OFF(ecode);
-               }
-       }
-
-       return off;
-}
diff --git a/lib/tdb2/hash.c b/lib/tdb2/hash.c
deleted file mode 100644 (file)
index 067884a..0000000
+++ /dev/null
@@ -1,894 +0,0 @@
- /*
-   Trivial Database 2: hash handling
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/hash/hash.h>
-#include <assert.h>
-
-/* Default hash function. */
-uint64_t tdb_jenkins_hash(const void *key, size_t length, uint64_t seed,
-                         void *unused)
-{
-       uint64_t ret;
-       /* hash64_stable assumes lower bits are more important; they are a
-        * slightly better hash.  We use the upper bits first, so swap them. */
-       ret = hash64_stable((const unsigned char *)key, length, seed);
-       return (ret >> 32) | (ret << 32);
-}
-
-uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len)
-{
-       return tdb->hash_fn(ptr, len, tdb->hash_seed, tdb->hash_data);
-}
-
-uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off)
-{
-       const struct tdb_used_record *r;
-       const void *key;
-       uint64_t klen, hash;
-
-       r = tdb_access_read(tdb, off, sizeof(*r), true);
-       if (TDB_PTR_IS_ERR(r)) {
-               /* FIXME */
-               return 0;
-       }
-
-       klen = rec_key_length(r);
-       tdb_access_release(tdb, r);
-
-       key = tdb_access_read(tdb, off + sizeof(*r), klen, false);
-       if (TDB_PTR_IS_ERR(key)) {
-               return 0;
-       }
-
-       hash = tdb_hash(tdb, key, klen);
-       tdb_access_release(tdb, key);
-       return hash;
-}
-
-/* Get bits from a value. */
-static uint32_t bits_from(uint64_t val, unsigned start, unsigned num)
-{
-       assert(num <= 32);
-       return (val >> start) & ((1U << num) - 1);
-}
-
-/* We take bits from the top: that way we can lock whole sections of the hash
- * by using lock ranges. */
-static uint32_t use_bits(struct hash_info *h, unsigned num)
-{
-       h->hash_used += num;
-       return bits_from(h->h, 64 - h->hash_used, num);
-}
-
-static tdb_bool_err key_matches(struct tdb_context *tdb,
-                               const struct tdb_used_record *rec,
-                               tdb_off_t off,
-                               const struct tdb_data *key)
-{
-       tdb_bool_err ret = false;
-       const char *rkey;
-
-       if (rec_key_length(rec) != key->dsize) {
-               tdb->stats.compare_wrong_keylen++;
-               return ret;
-       }
-
-       rkey = tdb_access_read(tdb, off + sizeof(*rec), key->dsize, false);
-       if (TDB_PTR_IS_ERR(rkey)) {
-               return (tdb_bool_err)TDB_PTR_ERR(rkey);
-       }
-       if (memcmp(rkey, key->dptr, key->dsize) == 0)
-               ret = true;
-       else
-               tdb->stats.compare_wrong_keycmp++;
-       tdb_access_release(tdb, rkey);
-       return ret;
-}
-
-/* Does entry match? */
-static tdb_bool_err match(struct tdb_context *tdb,
-                         struct hash_info *h,
-                         const struct tdb_data *key,
-                         tdb_off_t val,
-                         struct tdb_used_record *rec)
-{
-       tdb_off_t off;
-       enum TDB_ERROR ecode;
-
-       tdb->stats.compares++;
-       /* Desired bucket must match. */
-       if (h->home_bucket != (val & TDB_OFF_HASH_GROUP_MASK)) {
-               tdb->stats.compare_wrong_bucket++;
-               return false;
-       }
-
-       /* Top bits of offset == next bits of hash. */
-       if (bits_from(val, TDB_OFF_HASH_EXTRA_BIT, TDB_OFF_UPPER_STEAL_EXTRA)
-           != bits_from(h->h, 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA,
-                   TDB_OFF_UPPER_STEAL_EXTRA)) {
-               tdb->stats.compare_wrong_offsetbits++;
-               return false;
-       }
-
-       off = val & TDB_OFF_MASK;
-       ecode = tdb_read_convert(tdb, off, rec, sizeof(*rec));
-       if (ecode != TDB_SUCCESS) {
-               return (tdb_bool_err)ecode;
-       }
-
-       if ((h->h & ((1 << 11)-1)) != rec_hash(rec)) {
-               tdb->stats.compare_wrong_rechash++;
-               return false;
-       }
-
-       return key_matches(tdb, rec, off, key);
-}
-
-static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned bucket)
-{
-       return group_start
-               + (bucket % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
-}
-
-bool is_subhash(tdb_off_t val)
-{
-       return (val >> TDB_OFF_UPPER_STEAL_SUBHASH_BIT) & 1;
-}
-
-/* FIXME: Guess the depth, don't over-lock! */
-static tdb_off_t hlock_range(tdb_off_t group, tdb_off_t *size)
-{
-       *size = 1ULL << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS));
-       return group << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS));
-}
-
-static tdb_off_t COLD find_in_chain(struct tdb_context *tdb,
-                                   struct tdb_data key,
-                                   tdb_off_t chain,
-                                   struct hash_info *h,
-                                   struct tdb_used_record *rec,
-                                   struct traverse_info *tinfo)
-{
-       tdb_off_t off, next;
-       enum TDB_ERROR ecode;
-
-       /* In case nothing is free, we set these to zero. */
-       h->home_bucket = h->found_bucket = 0;
-
-       for (off = chain; off; off = next) {
-               unsigned int i;
-
-               h->group_start = off;
-               ecode = tdb_read_convert(tdb, off, h->group, sizeof(h->group));
-               if (ecode != TDB_SUCCESS) {
-                       return TDB_ERR_TO_OFF(ecode);
-               }
-
-               for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-                       tdb_off_t recoff;
-                       if (!h->group[i]) {
-                               /* Remember this empty bucket. */
-                               h->home_bucket = h->found_bucket = i;
-                               continue;
-                       }
-
-                       /* We can insert extra bits via add_to_hash
-                        * empty bucket logic. */
-                       recoff = h->group[i] & TDB_OFF_MASK;
-                       ecode = tdb_read_convert(tdb, recoff, rec,
-                                                sizeof(*rec));
-                       if (ecode != TDB_SUCCESS) {
-                               return TDB_ERR_TO_OFF(ecode);
-                       }
-
-                       ecode = TDB_OFF_TO_ERR(key_matches(tdb, rec, recoff,
-                                                          &key));
-                       if (ecode < 0) {
-                               return TDB_ERR_TO_OFF(ecode);
-                       }
-                       if (ecode == (enum TDB_ERROR)1) {
-                               h->home_bucket = h->found_bucket = i;
-
-                               if (tinfo) {
-                                       tinfo->levels[tinfo->num_levels]
-                                               .hashtable = off;
-                                       tinfo->levels[tinfo->num_levels]
-                                               .total_buckets
-                                               = 1 << TDB_HASH_GROUP_BITS;
-                                       tinfo->levels[tinfo->num_levels].entry
-                                               = i;
-                                       tinfo->num_levels++;
-                               }
-                               return recoff;
-                       }
-               }
-               next = tdb_read_off(tdb, off
-                                   + offsetof(struct tdb_chain, next));
-               if (TDB_OFF_IS_ERR(next)) {
-                       return next;
-               }
-               if (next)
-                       next += sizeof(struct tdb_used_record);
-       }
-       return 0;
-}
-
-/* This is the core routine which searches the hashtable for an entry.
- * On error, no locks are held and -ve is returned.
- * Otherwise, hinfo is filled in (and the optional tinfo).
- * If not found, the return value is 0.
- * If found, the return value is the offset, and *rec is the record. */
-tdb_off_t find_and_lock(struct tdb_context *tdb,
-                       struct tdb_data key,
-                       int ltype,
-                       struct hash_info *h,
-                       struct tdb_used_record *rec,
-                       struct traverse_info *tinfo)
-{
-       uint32_t i, group;
-       tdb_off_t hashtable;
-       enum TDB_ERROR ecode;
-
-       h->h = tdb_hash(tdb, key.dptr, key.dsize);
-       h->hash_used = 0;
-       group = use_bits(h, TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS);
-       h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS);
-
-       h->hlock_start = hlock_range(group, &h->hlock_range);
-       ecode = tdb_lock_hashes(tdb, h->hlock_start, h->hlock_range, ltype,
-                               TDB_LOCK_WAIT);
-       if (ecode != TDB_SUCCESS) {
-               return TDB_ERR_TO_OFF(ecode);
-       }
-
-       hashtable = offsetof(struct tdb_header, hashtable);
-       if (tinfo) {
-               tinfo->toplevel_group = group;
-               tinfo->num_levels = 1;
-               tinfo->levels[0].entry = 0;
-               tinfo->levels[0].hashtable = hashtable
-                       + (group << TDB_HASH_GROUP_BITS) * sizeof(tdb_off_t);
-               tinfo->levels[0].total_buckets = 1 << TDB_HASH_GROUP_BITS;
-       }
-
-       while (h->hash_used <= 64) {
-               /* Read in the hash group. */
-               h->group_start = hashtable
-                       + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
-
-               ecode = tdb_read_convert(tdb, h->group_start, &h->group,
-                                        sizeof(h->group));
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-
-               /* Pointer to another hash table?  Go down... */
-               if (is_subhash(h->group[h->home_bucket])) {
-                       hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK)
-                               + sizeof(struct tdb_used_record);
-                       if (tinfo) {
-                               /* When we come back, use *next* bucket */
-                               tinfo->levels[tinfo->num_levels-1].entry
-                                       += h->home_bucket + 1;
-                       }
-                       group = use_bits(h, TDB_SUBLEVEL_HASH_BITS
-                                        - TDB_HASH_GROUP_BITS);
-                       h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS);
-                       if (tinfo) {
-                               tinfo->levels[tinfo->num_levels].hashtable
-                                       = hashtable;
-                               tinfo->levels[tinfo->num_levels].total_buckets
-                                       = 1 << TDB_SUBLEVEL_HASH_BITS;
-                               tinfo->levels[tinfo->num_levels].entry
-                                       = group << TDB_HASH_GROUP_BITS;
-                               tinfo->num_levels++;
-                       }
-                       continue;
-               }
-
-               /* It's in this group: search (until 0 or all searched) */
-               for (i = 0, h->found_bucket = h->home_bucket;
-                    i < (1 << TDB_HASH_GROUP_BITS);
-                    i++, h->found_bucket = ((h->found_bucket+1)
-                                            % (1 << TDB_HASH_GROUP_BITS))) {
-                       tdb_bool_err berr;
-                       if (is_subhash(h->group[h->found_bucket]))
-                               continue;
-
-                       if (!h->group[h->found_bucket])
-                               break;
-
-                       berr = match(tdb, h, &key, h->group[h->found_bucket],
-                                    rec);
-                       if (berr < 0) {
-                               ecode = TDB_OFF_TO_ERR(berr);
-                               goto fail;
-                       }
-                       if (berr) {
-                               if (tinfo) {
-                                       tinfo->levels[tinfo->num_levels-1].entry
-                                               += h->found_bucket;
-                               }
-                               return h->group[h->found_bucket] & TDB_OFF_MASK;
-                       }
-               }
-               /* Didn't find it: h indicates where it would go. */
-               return 0;
-       }
-
-       return find_in_chain(tdb, key, hashtable, h, rec, tinfo);
-
-fail:
-       tdb_unlock_hashes(tdb, h->hlock_start, h->hlock_range, ltype);
-       return TDB_ERR_TO_OFF(ecode);
-}
-
-/* I wrote a simple test, expanding a hash to 2GB, for the following
- * cases:
- * 1) Expanding all the buckets at once,
- * 2) Expanding the bucket we wanted to place the new entry into.
- * 3) Expanding the most-populated bucket,
- *
- * I measured the worst/average/best density during this process.
- * 1) 3%/16%/30%
- * 2) 4%/20%/38%
- * 3) 6%/22%/41%
- *
- * So we figure out the busiest bucket for the moment.
- */
-static unsigned fullest_bucket(struct tdb_context *tdb,
-                              const tdb_off_t *group,
-                              unsigned new_bucket)
-{
-       unsigned counts[1 << TDB_HASH_GROUP_BITS] = { 0 };
-       unsigned int i, best_bucket;
-
-       /* Count the new entry. */
-       counts[new_bucket]++;
-       best_bucket = new_bucket;
-
-       for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-               unsigned this_bucket;
-
-               if (is_subhash(group[i]))
-                       continue;
-               this_bucket = group[i] & TDB_OFF_HASH_GROUP_MASK;
-               if (++counts[this_bucket] > counts[best_bucket])
-                       best_bucket = this_bucket;
-       }
-
-       return best_bucket;
-}
-
-static bool put_into_group(tdb_off_t *group,
-                          unsigned bucket, tdb_off_t encoded)
-{
-       unsigned int i;
-
-       for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-               unsigned b = (bucket + i) % (1 << TDB_HASH_GROUP_BITS);
-
-               if (group[b] == 0) {
-                       group[b] = encoded;
-                       return true;
-               }
-       }
-       return false;
-}
-
-static void force_into_group(tdb_off_t *group,
-                            unsigned bucket, tdb_off_t encoded)
-{
-       if (!put_into_group(group, bucket, encoded))
-               abort();
-}
-
-static tdb_off_t encode_offset(tdb_off_t new_off, struct hash_info *h)
-{
-       return h->home_bucket
-               | new_off
-               | ((uint64_t)bits_from(h->h,
-                                 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA,
-                                 TDB_OFF_UPPER_STEAL_EXTRA)
-                  << TDB_OFF_HASH_EXTRA_BIT);
-}
-
-/* Simply overwrite the hash entry we found before. */
-enum TDB_ERROR replace_in_hash(struct tdb_context *tdb,
-                              struct hash_info *h,
-                              tdb_off_t new_off)
-{
-       return tdb_write_off(tdb, hbucket_off(h->group_start, h->found_bucket),
-                            encode_offset(new_off, h));
-}
-
-/* We slot in anywhere that's empty in the chain. */
-static enum TDB_ERROR COLD add_to_chain(struct tdb_context *tdb,
-                                       tdb_off_t subhash,
-                                       tdb_off_t new_off)
-{
-       tdb_off_t entry;
-       enum TDB_ERROR ecode;
-
-       entry = tdb_find_zero_off(tdb, subhash, 1<<TDB_HASH_GROUP_BITS);
-       if (TDB_OFF_IS_ERR(entry)) {
-               return TDB_OFF_TO_ERR(entry);
-       }
-
-       if (entry == 1 << TDB_HASH_GROUP_BITS) {
-               tdb_off_t next;
-
-               next = tdb_read_off(tdb, subhash
-                                   + offsetof(struct tdb_chain, next));
-               if (TDB_OFF_IS_ERR(next)) {
-                       return TDB_OFF_TO_ERR(next);
-               }
-
-               if (!next) {
-                       next = alloc(tdb, 0, sizeof(struct tdb_chain), 0,
-                                    TDB_CHAIN_MAGIC, false);
-                       if (TDB_OFF_IS_ERR(next))
-                               return TDB_OFF_TO_ERR(next);
-                       ecode = zero_out(tdb,
-                                        next+sizeof(struct tdb_used_record),
-                                        sizeof(struct tdb_chain));
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-                       ecode = tdb_write_off(tdb, subhash
-                                             + offsetof(struct tdb_chain,
-                                                        next),
-                                             next);
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-               }
-               return add_to_chain(tdb, next, new_off);
-       }
-
-       return tdb_write_off(tdb, subhash + entry * sizeof(tdb_off_t),
-                            new_off);
-}
-
-/* Add into a newly created subhash. */
-static enum TDB_ERROR add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash,
-                                    unsigned hash_used, tdb_off_t val)
-{
-       tdb_off_t off = (val & TDB_OFF_MASK), *group;
-       struct hash_info h;
-       unsigned int gnum;
-
-       h.hash_used = hash_used;
-
-       if (hash_used + TDB_SUBLEVEL_HASH_BITS > 64)
-               return add_to_chain(tdb, subhash, off);
-
-       h.h = hash_record(tdb, off);
-       gnum = use_bits(&h, TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS);
-       h.group_start = subhash
-               + gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
-       h.home_bucket = use_bits(&h, TDB_HASH_GROUP_BITS);
-
-       group = tdb_access_write(tdb, h.group_start,
-                                sizeof(*group) << TDB_HASH_GROUP_BITS, true);
-       if (TDB_PTR_IS_ERR(group)) {
-               return TDB_PTR_ERR(group);
-       }
-       force_into_group(group, h.home_bucket, encode_offset(off, &h));
-       return tdb_access_commit(tdb, group);
-}
-
-static enum TDB_ERROR expand_group(struct tdb_context *tdb, struct hash_info *h)
-{
-       unsigned bucket, num_vals, i, magic;
-       size_t subsize;
-       tdb_off_t subhash;
-       tdb_off_t vals[1 << TDB_HASH_GROUP_BITS];
-       enum TDB_ERROR ecode;
-
-       /* Attach new empty subhash under fullest bucket. */
-       bucket = fullest_bucket(tdb, h->group, h->home_bucket);
-
-       if (h->hash_used == 64) {
-               tdb->stats.alloc_chain++;
-               subsize = sizeof(struct tdb_chain);
-               magic = TDB_CHAIN_MAGIC;
-       } else {
-               tdb->stats.alloc_subhash++;
-               subsize = (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS);
-               magic = TDB_HTABLE_MAGIC;
-       }
-
-       subhash = alloc(tdb, 0, subsize, 0, magic, false);
-       if (TDB_OFF_IS_ERR(subhash)) {
-               return TDB_OFF_TO_ERR(subhash);
-       }
-
-       ecode = zero_out(tdb, subhash + sizeof(struct tdb_used_record),
-                        subsize);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* Remove any which are destined for bucket or are in wrong place. */
-       num_vals = 0;
-       for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-               unsigned home_bucket = h->group[i] & TDB_OFF_HASH_GROUP_MASK;
-               if (!h->group[i] || is_subhash(h->group[i]))
-                       continue;
-               if (home_bucket == bucket || home_bucket != i) {
-                       vals[num_vals++] = h->group[i];
-                       h->group[i] = 0;
-               }
-       }
-       /* FIXME: This assert is valid, but we do this during unit test :( */
-       /* assert(num_vals); */
-
-       /* Overwrite expanded bucket with subhash pointer. */
-       h->group[bucket] = subhash | (1ULL << TDB_OFF_UPPER_STEAL_SUBHASH_BIT);
-
-       /* Point to actual contents of record. */
-       subhash += sizeof(struct tdb_used_record);
-
-       /* Put values back. */
-       for (i = 0; i < num_vals; i++) {
-               unsigned this_bucket = vals[i] & TDB_OFF_HASH_GROUP_MASK;
-
-               if (this_bucket == bucket) {
-                       ecode = add_to_subhash(tdb, subhash, h->hash_used,
-                                              vals[i]);
-                       if (ecode != TDB_SUCCESS)
-                               return ecode;
-               } else {
-                       /* There should be room to put this back. */
-                       force_into_group(h->group, this_bucket, vals[i]);
-               }
-       }
-       return TDB_SUCCESS;
-}
-
-enum TDB_ERROR delete_from_hash(struct tdb_context *tdb, struct hash_info *h)
-{
-       unsigned int i, num_movers = 0;
-       tdb_off_t movers[1 << TDB_HASH_GROUP_BITS];
-
-       h->group[h->found_bucket] = 0;
-       for (i = 1; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-               unsigned this_bucket;
-
-               this_bucket = (h->found_bucket+i) % (1 << TDB_HASH_GROUP_BITS);
-               /* Empty bucket?  We're done. */
-               if (!h->group[this_bucket])
-                       break;
-
-               /* Ignore subhashes. */
-               if (is_subhash(h->group[this_bucket]))
-                       continue;
-
-               /* If this one is not happy where it is, we'll move it. */
-               if ((h->group[this_bucket] & TDB_OFF_HASH_GROUP_MASK)
-                   != this_bucket) {
-                       movers[num_movers++] = h->group[this_bucket];
-                       h->group[this_bucket] = 0;
-               }
-       }
-
-       /* Put back the ones we erased. */
-       for (i = 0; i < num_movers; i++) {
-               force_into_group(h->group, movers[i] & TDB_OFF_HASH_GROUP_MASK,
-                                movers[i]);
-       }
-
-       /* Now we write back the hash group */
-       return tdb_write_convert(tdb, h->group_start,
-                                h->group, sizeof(h->group));
-}
-
-enum TDB_ERROR add_to_hash(struct tdb_context *tdb, struct hash_info *h,
-                          tdb_off_t new_off)
-{
-       enum TDB_ERROR ecode;
-
-       /* We hit an empty bucket during search?  That's where it goes. */
-       if (!h->group[h->found_bucket]) {
-               h->group[h->found_bucket] = encode_offset(new_off, h);
-               /* Write back the modified group. */
-               return tdb_write_convert(tdb, h->group_start,
-                                        h->group, sizeof(h->group));
-       }
-
-       if (h->hash_used > 64)
-               return add_to_chain(tdb, h->group_start, new_off);
-
-       /* We're full.  Expand. */
-       ecode = expand_group(tdb, h);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (is_subhash(h->group[h->home_bucket])) {
-               /* We were expanded! */
-               tdb_off_t hashtable;
-               unsigned int gnum;
-
-               /* Write back the modified group. */
-               ecode = tdb_write_convert(tdb, h->group_start, h->group,
-                                         sizeof(h->group));
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-
-               /* Move hashinfo down a level. */
-               hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK)
-                       + sizeof(struct tdb_used_record);
-               gnum = use_bits(h,TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS);
-               h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS);
-               h->group_start = hashtable
-                       + gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
-               ecode = tdb_read_convert(tdb, h->group_start, &h->group,
-                                        sizeof(h->group));
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-       }
-
-       /* Expanding the group must have made room if it didn't choose this
-        * bucket. */
-       if (put_into_group(h->group, h->home_bucket, encode_offset(new_off,h))){
-               return tdb_write_convert(tdb, h->group_start,
-                                        h->group, sizeof(h->group));
-       }
-
-       /* This can happen if all hashes in group (and us) dropped into same
-        * group in subhash. */
-       return add_to_hash(tdb, h, new_off);
-}
-
-/* Traverse support: returns offset of record, or 0 or -ve error. */
-static tdb_off_t iterate_hash(struct tdb_context *tdb,
-                             struct traverse_info *tinfo)
-{
-       tdb_off_t off, val, i;
-       struct traverse_level *tlevel;
-
-       tlevel = &tinfo->levels[tinfo->num_levels-1];
-
-again:
-       for (i = tdb_find_nonzero_off(tdb, tlevel->hashtable,
-                                     tlevel->entry, tlevel->total_buckets);
-            i != tlevel->total_buckets;
-            i = tdb_find_nonzero_off(tdb, tlevel->hashtable,
-                                     i+1, tlevel->total_buckets)) {
-               if (TDB_OFF_IS_ERR(i)) {
-                       return i;
-               }
-
-               val = tdb_read_off(tdb, tlevel->hashtable+sizeof(tdb_off_t)*i);
-               if (TDB_OFF_IS_ERR(val)) {
-                       return val;
-               }
-
-               off = val & TDB_OFF_MASK;
-
-               /* This makes the delete-all-in-traverse case work
-                * (and simplifies our logic a little). */
-               if (off == tinfo->prev)
-                       continue;
-
-               tlevel->entry = i;
-
-               if (!is_subhash(val)) {
-                       /* Found one. */
-                       tinfo->prev = off;
-                       return off;
-               }
-
-               /* When we come back, we want the next one */
-               tlevel->entry++;
-               tinfo->num_levels++;
-               tlevel++;
-               tlevel->hashtable = off + sizeof(struct tdb_used_record);
-               tlevel->entry = 0;
-               /* Next level is a chain? */
-               if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1))
-                       tlevel->total_buckets = (1 << TDB_HASH_GROUP_BITS);
-               else
-                       tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS);
-               goto again;
-       }
-
-       /* Nothing there? */
-       if (tinfo->num_levels == 1)
-               return 0;
-
-       /* Handle chained entries. */
-       if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1)) {
-               tlevel->hashtable = tdb_read_off(tdb, tlevel->hashtable
-                                                + offsetof(struct tdb_chain,
-                                                           next));
-               if (TDB_OFF_IS_ERR(tlevel->hashtable)) {
-                       return tlevel->hashtable;
-               }
-               if (tlevel->hashtable) {
-                       tlevel->hashtable += sizeof(struct tdb_used_record);
-                       tlevel->entry = 0;
-                       goto again;
-               }
-       }
-
-       /* Go back up and keep searching. */
-       tinfo->num_levels--;
-       tlevel--;
-       goto again;
-}
-
-/* Return success if we find something, TDB_ERR_NOEXIST if none. */
-enum TDB_ERROR next_in_hash(struct tdb_context *tdb,
-                           struct traverse_info *tinfo,
-                           TDB_DATA *kbuf, size_t *dlen)
-{
-       const unsigned group_bits = TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS;
-       tdb_off_t hl_start, hl_range, off;
-       enum TDB_ERROR ecode;
-
-       while (tinfo->toplevel_group < (1 << group_bits)) {
-               hl_start = (tdb_off_t)tinfo->toplevel_group
-                       << (64 - group_bits);
-               hl_range = 1ULL << group_bits;
-               ecode = tdb_lock_hashes(tdb, hl_start, hl_range, F_RDLCK,
-                                       TDB_LOCK_WAIT);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-
-               off = iterate_hash(tdb, tinfo);
-               if (off) {
-                       struct tdb_used_record rec;
-
-                       if (TDB_OFF_IS_ERR(off)) {
-                               ecode = TDB_OFF_TO_ERR(off);
-                               goto fail;
-                       }
-
-                       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-                       if (ecode != TDB_SUCCESS) {
-                               goto fail;
-                       }
-                       if (rec_magic(&rec) != TDB_USED_MAGIC) {
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "next_in_hash:"
-                                                  " corrupt record at %llu",
-                                                  (long long)off);
-                               goto fail;
-                       }
-
-                       kbuf->dsize = rec_key_length(&rec);
-
-                       /* They want data as well? */
-                       if (dlen) {
-                               *dlen = rec_data_length(&rec);
-                               kbuf->dptr = tdb_alloc_read(tdb,
-                                                           off + sizeof(rec),
-                                                           kbuf->dsize
-                                                           + *dlen);
-                       } else {
-                               kbuf->dptr = tdb_alloc_read(tdb,
-                                                           off + sizeof(rec),
-                                                           kbuf->dsize);
-                       }
-                       tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK);
-                       if (TDB_PTR_IS_ERR(kbuf->dptr)) {
-                               return TDB_PTR_ERR(kbuf->dptr);
-                       }
-                       return TDB_SUCCESS;
-               }
-
-               tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK);
-
-               tinfo->toplevel_group++;
-               tinfo->levels[0].hashtable
-                       += (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
-               tinfo->levels[0].entry = 0;
-       }
-       return TDB_ERR_NOEXIST;
-
-fail:
-       tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK);
-       return ecode;
-
-}
-
-enum TDB_ERROR first_in_hash(struct tdb_context *tdb,
-                            struct traverse_info *tinfo,
-                            TDB_DATA *kbuf, size_t *dlen)
-{
-       tinfo->prev = 0;
-       tinfo->toplevel_group = 0;
-       tinfo->num_levels = 1;
-       tinfo->levels[0].hashtable = offsetof(struct tdb_header, hashtable);
-       tinfo->levels[0].entry = 0;
-       tinfo->levels[0].total_buckets = (1 << TDB_HASH_GROUP_BITS);
-
-       return next_in_hash(tdb, tinfo, kbuf, dlen);
-}
-
-/* Even if the entry isn't in this hash bucket, you'd have to lock this
- * bucket to find it. */
-static enum TDB_ERROR chainlock(struct tdb_context *tdb, const TDB_DATA *key,
-                               int ltype, enum tdb_lock_flags waitflag,
-                               const char *func)
-{
-       enum TDB_ERROR ecode;
-       uint64_t h = tdb_hash(tdb, key->dptr, key->dsize);
-       tdb_off_t lockstart, locksize;
-       unsigned int group, gbits;
-
-       gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS;
-       group = bits_from(h, 64 - gbits, gbits);
-
-       lockstart = hlock_range(group, &locksize);
-
-       ecode = tdb_lock_hashes(tdb, lockstart, locksize, ltype, waitflag);
-       tdb_trace_1rec(tdb, func, *key);
-       return ecode;
-}
-
-/* lock/unlock one hash chain. This is meant to be used to reduce
-   contention - it cannot guarantee how many records will be locked */
-_PUBLIC_ enum TDB_ERROR tdb_chainlock(struct tdb_context *tdb, TDB_DATA key)
-{
-       return tdb->last_error = chainlock(tdb, &key, F_WRLCK, TDB_LOCK_WAIT,
-                                          "tdb_chainlock");
-}
-
-_PUBLIC_ void tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
-{
-       uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
-       tdb_off_t lockstart, locksize;
-       unsigned int group, gbits;
-
-       gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS;
-       group = bits_from(h, 64 - gbits, gbits);
-
-       lockstart = hlock_range(group, &locksize);
-
-       tdb_trace_1rec(tdb, "tdb_chainunlock", key);
-       tdb_unlock_hashes(tdb, lockstart, locksize, F_WRLCK);
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key)
-{
-       return tdb->last_error = chainlock(tdb, &key, F_RDLCK, TDB_LOCK_WAIT,
-                                          "tdb_chainlock_read");
-}
-
-_PUBLIC_ void tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key)
-{
-       uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
-       tdb_off_t lockstart, locksize;
-       unsigned int group, gbits;
-
-       gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS;
-       group = bits_from(h, 64 - gbits, gbits);
-
-       lockstart = hlock_range(group, &locksize);
-
-       tdb_trace_1rec(tdb, "tdb_chainunlock_read", key);
-       tdb_unlock_hashes(tdb, lockstart, locksize, F_RDLCK);
-}
diff --git a/lib/tdb2/io.c b/lib/tdb2/io.c
deleted file mode 100644 (file)
index ca044ae..0000000
+++ /dev/null
@@ -1,650 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              1999-2005
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000-2003
-   Copyright (C) Rusty Russell                    2010
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <assert.h>
-#include <ccan/likely/likely.h>
-
-void tdb_munmap(struct tdb_file *file)
-{
-       if (file->fd == -1)
-               return;
-
-       if (file->map_ptr) {
-               munmap(file->map_ptr, file->map_size);
-               file->map_ptr = NULL;
-       }
-}
-
-enum TDB_ERROR tdb_mmap(struct tdb_context *tdb)
-{
-       int mmap_flags;
-
-       if (tdb->flags & TDB_INTERNAL)
-               return TDB_SUCCESS;
-
-#ifndef HAVE_INCOHERENT_MMAP
-       if (tdb->flags & TDB_NOMMAP)
-               return TDB_SUCCESS;
-#endif
-
-       if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
-               mmap_flags = PROT_READ;
-       else
-               mmap_flags = PROT_READ | PROT_WRITE;
-
-       /* size_t can be smaller than off_t. */
-       if ((size_t)tdb->file->map_size == tdb->file->map_size) {
-               tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
-                                         mmap_flags,
-                                         MAP_SHARED, tdb->file->fd, 0);
-       } else
-               tdb->file->map_ptr = MAP_FAILED;
-
-       /*
-        * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
-        */
-       if (tdb->file->map_ptr == MAP_FAILED) {
-               tdb->file->map_ptr = NULL;
-#ifdef HAVE_INCOHERENT_MMAP
-               /* Incoherent mmap means everyone must mmap! */
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_mmap failed for size %lld (%s)",
-                                 (long long)tdb->file->map_size,
-                                 strerror(errno));
-#else
-               tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                          "tdb_mmap failed for size %lld (%s)",
-                          (long long)tdb->file->map_size, strerror(errno));
-#endif
-       }
-       return TDB_SUCCESS;
-}
-
-/* check for an out of bounds access - if it is out of bounds then
-   see if the database has been expanded by someone else and expand
-   if necessary
-   note that "len" is the minimum length needed for the db.
-
-   If probe is true, len being too large isn't a failure.
-*/
-static enum TDB_ERROR tdb_oob(struct tdb_context *tdb,
-                             tdb_off_t off, tdb_len_t len, bool probe)
-{
-       struct stat st;
-       enum TDB_ERROR ecode;
-
-       /* We can't hold pointers during this: we could unmap! */
-       assert(!tdb->direct_access
-              || (tdb->flags & TDB_NOLOCK)
-              || tdb_has_expansion_lock(tdb));
-
-       if (len + off < len) {
-               if (probe)
-                       return TDB_SUCCESS;
-
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_oob off %llu len %llu wrap\n",
-                                 (long long)off, (long long)len);
-       }
-
-       if (len + off <= tdb->file->map_size)
-               return TDB_SUCCESS;
-       if (tdb->flags & TDB_INTERNAL) {
-               if (probe)
-                       return TDB_SUCCESS;
-
-               tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                          "tdb_oob len %lld beyond internal"
-                          " malloc size %lld",
-                          (long long)(off + len),
-                          (long long)tdb->file->map_size);
-               return TDB_ERR_IO;
-       }
-
-       ecode = tdb_lock_expand(tdb, F_RDLCK);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (fstat(tdb->file->fd, &st) != 0) {
-               tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                          "Failed to fstat file: %s", strerror(errno));
-               tdb_unlock_expand(tdb, F_RDLCK);
-               return TDB_ERR_IO;
-       }
-
-       tdb_unlock_expand(tdb, F_RDLCK);
-
-       if (st.st_size < off + len) {
-               if (probe)
-                       return TDB_SUCCESS;
-
-               tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                          "tdb_oob len %llu beyond eof at %llu",
-                          (long long)(off + len), (long long)st.st_size);
-               return TDB_ERR_IO;
-       }
-
-       /* Unmap, update size, remap */
-       tdb_munmap(tdb->file);
-
-       tdb->file->map_size = st.st_size;
-       return tdb_mmap(tdb);
-}
-
-/* Endian conversion: we only ever deal with 8 byte quantities */
-void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
-{
-       assert(size % 8 == 0);
-       if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
-               uint64_t i, *p = (uint64_t *)buf;
-               for (i = 0; i < size / 8; i++)
-                       p[i] = bswap_64(p[i]);
-       }
-       return buf;
-}
-
-/* Return first non-zero offset in offset array, or end, or -ve error. */
-/* FIXME: Return the off? */
-uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
-                             tdb_off_t base, uint64_t start, uint64_t end)
-{
-       uint64_t i;
-       const uint64_t *val;
-
-       /* Zero vs non-zero is the same unconverted: minor optimization. */
-       val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
-                             (end - start) * sizeof(tdb_off_t), false);
-       if (TDB_PTR_IS_ERR(val)) {
-               return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
-       }
-
-       for (i = 0; i < (end - start); i++) {
-               if (val[i])
-                       break;
-       }
-       tdb_access_release(tdb, val);
-       return start + i;
-}
-
-/* Return first zero offset in num offset array, or num, or -ve error. */
-uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
-                          uint64_t num)
-{
-       uint64_t i;
-       const uint64_t *val;
-
-       /* Zero vs non-zero is the same unconverted: minor optimization. */
-       val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
-       if (TDB_PTR_IS_ERR(val)) {
-               return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
-       }
-
-       for (i = 0; i < num; i++) {
-               if (!val[i])
-                       break;
-       }
-       tdb_access_release(tdb, val);
-       return i;
-}
-
-enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
-{
-       char buf[8192] = { 0 };
-       void *p = tdb->io->direct(tdb, off, len, true);
-       enum TDB_ERROR ecode = TDB_SUCCESS;
-
-       assert(!(tdb->flags & TDB_RDONLY));
-       if (TDB_PTR_IS_ERR(p)) {
-               return TDB_PTR_ERR(p);
-       }
-       if (p) {
-               memset(p, 0, len);
-               return ecode;
-       }
-       while (len) {
-               unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
-               ecode = tdb->io->twrite(tdb, off, buf, todo);
-               if (ecode != TDB_SUCCESS) {
-                       break;
-               }
-               len -= todo;
-               off += todo;
-       }
-       return ecode;
-}
-
-tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
-{
-       tdb_off_t ret;
-       enum TDB_ERROR ecode;
-
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               tdb_off_t *p = tdb->io->direct(tdb, off, sizeof(*p), false);
-               if (TDB_PTR_IS_ERR(p)) {
-                       return TDB_ERR_TO_OFF(TDB_PTR_ERR(p));
-               }
-               if (p)
-                       return *p;
-       }
-
-       ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
-       if (ecode != TDB_SUCCESS) {
-               return TDB_ERR_TO_OFF(ecode);
-       }
-       return ret;
-}
-
-/* write a lump of data at a specified offset */
-static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
-                               const void *buf, tdb_len_t len)
-{
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
-                                 "Write to read-only database");
-       }
-
-       ecode = tdb->io->oob(tdb, off, len, false);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (tdb->file->map_ptr) {
-               memcpy(off + (char *)tdb->file->map_ptr, buf, len);
-       } else {
-#ifdef HAVE_INCOHERENT_MMAP
-               return TDB_ERR_IO;
-#else
-               ssize_t ret;
-               ret = pwrite(tdb->file->fd, buf, len, off);
-               if (ret != len) {
-                       /* This shouldn't happen: we avoid sparse files. */
-                       if (ret >= 0)
-                               errno = ENOSPC;
-
-                       return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                         "tdb_write: %zi at %zu len=%zu (%s)",
-                                         ret, (size_t)off, (size_t)len,
-                                         strerror(errno));
-               }
-#endif
-       }
-       return TDB_SUCCESS;
-}
-
-/* read a lump of data at a specified offset */
-static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
-                              void *buf, tdb_len_t len)
-{
-       enum TDB_ERROR ecode;
-
-       ecode = tdb->io->oob(tdb, off, len, false);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (tdb->file->map_ptr) {
-               memcpy(buf, off + (char *)tdb->file->map_ptr, len);
-       } else {
-#ifdef HAVE_INCOHERENT_MMAP
-               return TDB_ERR_IO;
-#else
-               ssize_t r = pread(tdb->file->fd, buf, len, off);
-               if (r != len) {
-                       return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                         "tdb_read failed with %zi at %zu "
-                                         "len=%zu (%s) map_size=%zu",
-                                         r, (size_t)off, (size_t)len,
-                                         strerror(errno),
-                                         (size_t)tdb->file->map_size);
-               }
-#endif
-       }
-       return TDB_SUCCESS;
-}
-
-enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
-                                const void *rec, size_t len)
-{
-       enum TDB_ERROR ecode;
-
-       if (unlikely((tdb->flags & TDB_CONVERT))) {
-               void *conv = malloc(len);
-               if (!conv) {
-                       return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                         "tdb_write: no memory converting"
-                                         " %zu bytes", len);
-               }
-               memcpy(conv, rec, len);
-               ecode = tdb->io->twrite(tdb, off,
-                                       tdb_convert(tdb, conv, len), len);
-               free(conv);
-       } else {
-               ecode = tdb->io->twrite(tdb, off, rec, len);
-       }
-       return ecode;
-}
-
-enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
-                               void *rec, size_t len)
-{
-       enum TDB_ERROR ecode = tdb->io->tread(tdb, off, rec, len);
-       tdb_convert(tdb, rec, len);
-       return ecode;
-}
-
-enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
-                            tdb_off_t off, tdb_off_t val)
-{
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
-                                 "Write to read-only database");
-       }
-
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               tdb_off_t *p = tdb->io->direct(tdb, off, sizeof(*p), true);
-               if (TDB_PTR_IS_ERR(p)) {
-                       return TDB_PTR_ERR(p);
-               }
-               if (p) {
-                       *p = val;
-                       return TDB_SUCCESS;
-               }
-       }
-       return tdb_write_convert(tdb, off, &val, sizeof(val));
-}
-
-static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
-                            tdb_len_t len, unsigned int prefix)
-{
-       unsigned char *buf;
-       enum TDB_ERROR ecode;
-
-       /* some systems don't like zero length malloc */
-       buf = malloc(prefix + len ? prefix + len : 1);
-       if (!buf) {
-               tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
-                          "tdb_alloc_read malloc failed len=%zu",
-                          (size_t)(prefix + len));
-               return TDB_ERR_PTR(TDB_ERR_OOM);
-       } else {
-               ecode = tdb->io->tread(tdb, offset, buf+prefix, len);
-               if (unlikely(ecode != TDB_SUCCESS)) {
-                       free(buf);
-                       return TDB_ERR_PTR(ecode);
-               }
-       }
-       return buf;
-}
-
-/* read a lump of data, allocating the space for it */
-void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
-{
-       return _tdb_alloc_read(tdb, offset, len, 0);
-}
-
-static enum TDB_ERROR fill(struct tdb_context *tdb,
-                          const void *buf, size_t size,
-                          tdb_off_t off, tdb_len_t len)
-{
-       while (len) {
-               size_t n = len > size ? size : len;
-               ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
-               if (ret != n) {
-                       if (ret >= 0)
-                               errno = ENOSPC;
-
-                       return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                         "fill failed:"
-                                         " %zi at %zu len=%zu (%s)",
-                                         ret, (size_t)off, (size_t)len,
-                                         strerror(errno));
-               }
-               len -= n;
-               off += n;
-       }
-       return TDB_SUCCESS;
-}
-
-/* expand a file.  we prefer to use ftruncate, as that is what posix
-  says to use for mmap expansion */
-static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
-                                     tdb_len_t addition)
-{
-       char buf[8192];
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
-                                 "Expand on read-only database");
-       }
-
-       if (tdb->flags & TDB_INTERNAL) {
-               char *new = realloc(tdb->file->map_ptr,
-                                   tdb->file->map_size + addition);
-               if (!new) {
-                       return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                         "No memory to expand database");
-               }
-               tdb->file->map_ptr = new;
-               tdb->file->map_size += addition;
-               return TDB_SUCCESS;
-       } else {
-               /* Unmap before trying to write; old TDB claimed OpenBSD had
-                * problem with this otherwise. */
-               tdb_munmap(tdb->file);
-
-               /* If this fails, we try to fill anyway. */
-               if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
-                       ;
-
-               /* now fill the file with something. This ensures that the
-                  file isn't sparse, which would be very bad if we ran out of
-                  disk. This must be done with write, not via mmap */
-               memset(buf, 0x43, sizeof(buf));
-               ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
-                            addition);
-               if (ecode != TDB_SUCCESS)
-                       return ecode;
-               tdb->file->map_size += addition;
-               return tdb_mmap(tdb);
-       }
-}
-
-const void *tdb_access_read(struct tdb_context *tdb,
-                           tdb_off_t off, tdb_len_t len, bool convert)
-{
-       void *ret = NULL;
-
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               ret = tdb->io->direct(tdb, off, len, false);
-
-               if (TDB_PTR_IS_ERR(ret)) {
-                       return ret;
-               }
-       }
-       if (!ret) {
-               struct tdb_access_hdr *hdr;
-               hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
-               if (TDB_PTR_IS_ERR(hdr)) {
-                       return hdr;
-               }
-               hdr->next = tdb->access;
-               tdb->access = hdr;
-               ret = hdr + 1;
-               if (convert) {
-                       tdb_convert(tdb, (void *)ret, len);
-               }
-       } else
-               tdb->direct_access++;
-
-       return ret;
-}
-
-void *tdb_access_write(struct tdb_context *tdb,
-                      tdb_off_t off, tdb_len_t len, bool convert)
-{
-       void *ret = NULL;
-
-       if (tdb->flags & TDB_RDONLY) {
-               tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
-                          "Write to read-only database");
-               return TDB_ERR_PTR(TDB_ERR_RDONLY);
-       }
-
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               ret = tdb->io->direct(tdb, off, len, true);
-
-               if (TDB_PTR_IS_ERR(ret)) {
-                       return ret;
-               }
-       }
-
-       if (!ret) {
-               struct tdb_access_hdr *hdr;
-               hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
-               if (TDB_PTR_IS_ERR(hdr)) {
-                       return hdr;
-               }
-               hdr->next = tdb->access;
-               tdb->access = hdr;
-               hdr->off = off;
-               hdr->len = len;
-               hdr->convert = convert;
-               ret = hdr + 1;
-               if (convert)
-                       tdb_convert(tdb, (void *)ret, len);
-       } else
-               tdb->direct_access++;
-
-       return ret;
-}
-
-static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
-{
-       struct tdb_access_hdr **hp;
-
-       for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
-               if (*hp + 1 == p)
-                       return hp;
-       }
-       return NULL;
-}
-
-void tdb_access_release(struct tdb_context *tdb, const void *p)
-{
-       struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
-
-       if (hp) {
-               hdr = *hp;
-               *hp = hdr->next;
-               free(hdr);
-       } else
-               tdb->direct_access--;
-}
-
-enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
-{
-       struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
-       enum TDB_ERROR ecode;
-
-       if (hp) {
-               hdr = *hp;
-               if (hdr->convert)
-                       ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
-               else
-                       ecode = tdb_write(tdb, hdr->off, p, hdr->len);
-               *hp = hdr->next;
-               free(hdr);
-       } else {
-               tdb->direct_access--;
-               ecode = TDB_SUCCESS;
-       }
-
-       return ecode;
-}
-
-static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
-                       bool write_mode)
-{
-       enum TDB_ERROR ecode;
-
-       if (unlikely(!tdb->file->map_ptr))
-               return NULL;
-
-       ecode = tdb_oob(tdb, off, len, false);
-       if (unlikely(ecode != TDB_SUCCESS))
-               return TDB_ERR_PTR(ecode);
-       return (char *)tdb->file->map_ptr + off;
-}
-
-void tdb_inc_seqnum(struct tdb_context *tdb)
-{
-       tdb_off_t seq;
-
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               int64_t *direct;
-
-               direct = tdb->io->direct(tdb,
-                                        offsetof(struct tdb_header, seqnum),
-                                        sizeof(*direct), true);
-               if (likely(direct)) {
-                       /* Don't let it go negative, even briefly */
-                       if (unlikely((*direct) + 1) < 0)
-                               *direct = 0;
-                       (*direct)++;
-                       return;
-               }
-       }
-
-       seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
-       if (!TDB_OFF_IS_ERR(seq)) {
-               seq++;
-               if (unlikely((int64_t)seq < 0))
-                       seq = 0;
-               tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
-       }
-}
-
-static const struct tdb_methods io_methods = {
-       tdb_read,
-       tdb_write,
-       tdb_oob,
-       tdb_expand_file,
-       tdb_direct,
-};
-
-/*
-  initialise the default methods table
-*/
-void tdb_io_init(struct tdb_context *tdb)
-{
-       tdb->io = &io_methods;
-}
diff --git a/lib/tdb2/lock.c b/lib/tdb2/lock.c
deleted file mode 100644 (file)
index b058354..0000000
+++ /dev/null
@@ -1,883 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              1999-2005
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000-2003
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "private.h"
-#include <assert.h>
-#include <ccan/build_assert/build_assert.h>
-
-/* If we were threaded, we could wait for unlock, but we're not, so fail. */
-enum TDB_ERROR owner_conflict(struct tdb_context *tdb, const char *call)
-{
-       return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                         "%s: lock owned by another tdb in this process.",
-                         call);
-}
-
-/* If we fork, we no longer really own locks. */
-bool check_lock_pid(struct tdb_context *tdb, const char *call, bool log)
-{
-       /* No locks?  No problem! */
-       if (tdb->file->allrecord_lock.count == 0
-           && tdb->file->num_lockrecs == 0) {
-               return true;
-       }
-
-       /* No fork?  No problem! */
-       if (tdb->file->locker == getpid()) {
-               return true;
-       }
-
-       if (log) {
-               tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                          "%s: fork() detected after lock acquisition!"
-                          " (%u vs %u)", call, tdb->file->locker, getpid());
-       }
-       return false;
-}
-
-int tdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
-                  void *unused)
-{
-       struct flock fl;
-       int ret;
-
-       do {
-               fl.l_type = rw;
-               fl.l_whence = SEEK_SET;
-               fl.l_start = off;
-               fl.l_len = len;
-
-               if (waitflag)
-                       ret = fcntl(fd, F_SETLKW, &fl);
-               else
-                       ret = fcntl(fd, F_SETLK, &fl);
-       } while (ret != 0 && errno == EINTR);
-       return ret;
-}
-
-int tdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *unused)
-{
-       struct flock fl;
-       int ret;
-
-       do {
-               fl.l_type = F_UNLCK;
-               fl.l_whence = SEEK_SET;
-               fl.l_start = off;
-               fl.l_len = len;
-
-               ret = fcntl(fd, F_SETLKW, &fl);
-       } while (ret != 0 && errno == EINTR);
-       return ret;
-}
-
-static int lock(struct tdb_context *tdb,
-                     int rw, off_t off, off_t len, bool waitflag)
-{
-       int ret;
-       if (tdb->file->allrecord_lock.count == 0
-           && tdb->file->num_lockrecs == 0) {
-               tdb->file->locker = getpid();
-       }
-
-       tdb->stats.lock_lowlevel++;
-       ret = tdb->lock_fn(tdb->file->fd, rw, off, len, waitflag,
-                          tdb->lock_data);
-       if (!waitflag) {
-               tdb->stats.lock_nonblock++;
-               if (ret != 0)
-                       tdb->stats.lock_nonblock_fail++;
-       }
-       return ret;
-}
-
-static int unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
-{
-#if 0 /* Check they matched up locks and unlocks correctly. */
-       char line[80];
-       FILE *locks;
-       bool found = false;
-
-       locks = fopen("/proc/locks", "r");
-
-       while (fgets(line, 80, locks)) {
-               char *p;
-               int type, start, l;
-
-               /* eg. 1: FLOCK  ADVISORY  WRITE 2440 08:01:2180826 0 EOF */
-               p = strchr(line, ':') + 1;
-               if (strncmp(p, " POSIX  ADVISORY  ", strlen(" POSIX  ADVISORY  ")))
-                       continue;
-               p += strlen(" FLOCK  ADVISORY  ");
-               if (strncmp(p, "READ  ", strlen("READ  ")) == 0)
-                       type = F_RDLCK;
-               else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
-                       type = F_WRLCK;
-               else
-                       abort();
-               p += 6;
-               if (atoi(p) != getpid())
-                       continue;
-               p = strchr(strchr(p, ' ') + 1, ' ') + 1;
-               start = atoi(p);
-               p = strchr(p, ' ') + 1;
-               if (strncmp(p, "EOF", 3) == 0)
-                       l = 0;
-               else
-                       l = atoi(p) - start + 1;
-
-               if (off == start) {
-                       if (len != l) {
-                               fprintf(stderr, "Len %u should be %u: %s",
-                                       (int)len, l, line);
-                               abort();
-                       }
-                       if (type != rw) {
-                               fprintf(stderr, "Type %s wrong: %s",
-                                       rw == F_RDLCK ? "READ" : "WRITE", line);
-                               abort();
-                       }
-                       found = true;
-                       break;
-               }
-       }
-
-       if (!found) {
-               fprintf(stderr, "Unlock on %u@%u not found!",
-                       (int)off, (int)len);
-               abort();
-       }
-
-       fclose(locks);
-#endif
-
-       return tdb->unlock_fn(tdb->file->fd, rw, off, len, tdb->lock_data);
-}
-
-/* a byte range locking function - return 0 on success
-   this functions locks len bytes at the specified offset.
-
-   note that a len of zero means lock to end of file
-*/
-static enum TDB_ERROR tdb_brlock(struct tdb_context *tdb,
-                                int rw_type, tdb_off_t offset, tdb_off_t len,
-                                enum tdb_lock_flags flags)
-{
-       int ret;
-
-       if (tdb->flags & TDB_NOLOCK) {
-               return TDB_SUCCESS;
-       }
-
-       if (rw_type == F_WRLCK && (tdb->flags & TDB_RDONLY)) {
-               return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
-                                 "Write lock attempted on read-only database");
-       }
-
-       /* A 32 bit system cannot open a 64-bit file, but it could have
-        * expanded since then: check here. */
-       if ((size_t)(offset + len) != offset + len) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_brlock: lock on giant offset %llu",
-                                 (long long)(offset + len));
-       }
-
-       ret = lock(tdb, rw_type, offset, len, flags & TDB_LOCK_WAIT);
-       if (ret != 0) {
-               /* Generic lock error. errno set by fcntl.
-                * EAGAIN is an expected return from non-blocking
-                * locks. */
-               if (!(flags & TDB_LOCK_PROBE)
-                   && (errno != EAGAIN && errno != EINTR)) {
-                       tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                  "tdb_brlock failed (fd=%d) at"
-                                  " offset %zu rw_type=%d flags=%d len=%zu:"
-                                  " %s",
-                                  tdb->file->fd, (size_t)offset, rw_type,
-                                  flags, (size_t)len, strerror(errno));
-               }
-               return TDB_ERR_LOCK;
-       }
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR tdb_brunlock(struct tdb_context *tdb,
-                                  int rw_type, tdb_off_t offset, size_t len)
-{
-       if (tdb->flags & TDB_NOLOCK) {
-               return TDB_SUCCESS;
-       }
-
-       if (!check_lock_pid(tdb, "tdb_brunlock", true))
-               return TDB_ERR_LOCK;
-
-       if (unlock(tdb, rw_type, offset, len) == -1) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_brunlock failed (fd=%d) at offset %zu"
-                                 " rw_type=%d len=%zu: %s",
-                                 tdb->file->fd, (size_t)offset, rw_type,
-                                 (size_t)len, strerror(errno));
-       }
-       return TDB_SUCCESS;
-}
-
-/*
-  upgrade a read lock to a write lock. This needs to be handled in a
-  special way as some OSes (such as solaris) have too conservative
-  deadlock detection and claim a deadlock when progress can be
-  made. For those OSes we may loop for a while.
-*/
-enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb, off_t start)
-{
-       int count = 1000;
-
-       if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true))
-               return TDB_ERR_LOCK;
-
-       if (tdb->file->allrecord_lock.count != 1) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_allrecord_upgrade failed:"
-                                 " count %u too high",
-                                 tdb->file->allrecord_lock.count);
-       }
-
-       if (tdb->file->allrecord_lock.off != 1) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_allrecord_upgrade failed:"
-                                 " already upgraded?");
-       }
-
-       if (tdb->file->allrecord_lock.owner != tdb) {
-               return owner_conflict(tdb, "tdb_allrecord_upgrade");
-       }
-
-       while (count--) {
-               struct timeval tv;
-               if (tdb_brlock(tdb, F_WRLCK, start, 0,
-                              TDB_LOCK_WAIT|TDB_LOCK_PROBE) == TDB_SUCCESS) {
-                       tdb->file->allrecord_lock.ltype = F_WRLCK;
-                       tdb->file->allrecord_lock.off = 0;
-                       return TDB_SUCCESS;
-               }
-               if (errno != EDEADLK) {
-                       break;
-               }
-               /* sleep for as short a time as we can - more portable than usleep() */
-               tv.tv_sec = 0;
-               tv.tv_usec = 1;
-               select(0, NULL, NULL, NULL, &tv);
-       }
-
-       if (errno != EAGAIN && errno != EINTR)
-               tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                          "tdb_allrecord_upgrade failed");
-       return TDB_ERR_LOCK;
-}
-
-static struct tdb_lock *find_nestlock(struct tdb_context *tdb, tdb_off_t offset,
-                                     const struct tdb_context *owner)
-{
-       unsigned int i;
-
-       for (i=0; i<tdb->file->num_lockrecs; i++) {
-               if (tdb->file->lockrecs[i].off == offset) {
-                       if (owner && tdb->file->lockrecs[i].owner != owner)
-                               return NULL;
-                       return &tdb->file->lockrecs[i];
-               }
-       }
-       return NULL;
-}
-
-enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb)
-{
-       enum TDB_ERROR ecode;
-
-       if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true))
-               return TDB_ERR_LOCK;
-
-       ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK,
-                                  false);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       ecode = tdb_lock_open(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
-       if (ecode != TDB_SUCCESS) {
-               tdb_allrecord_unlock(tdb, F_WRLCK);
-               return ecode;
-       }
-       ecode = tdb_transaction_recover(tdb);
-       tdb_unlock_open(tdb, F_WRLCK);
-       tdb_allrecord_unlock(tdb, F_WRLCK);
-
-       return ecode;
-}
-
-/* lock an offset in the database. */
-static enum TDB_ERROR tdb_nest_lock(struct tdb_context *tdb,
-                                   tdb_off_t offset, int ltype,
-                                   enum tdb_lock_flags flags)
-{
-       struct tdb_lock *new_lck;
-       enum TDB_ERROR ecode;
-
-       if (offset > (TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
-                     + tdb->file->map_size / 8)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_nest_lock: invalid offset %zu ltype=%d",
-                                 (size_t)offset, ltype);
-       }
-
-       if (tdb->flags & TDB_NOLOCK)
-               return TDB_SUCCESS;
-
-       if (!check_lock_pid(tdb, "tdb_nest_lock", true)) {
-               return TDB_ERR_LOCK;
-       }
-
-       tdb->stats.locks++;
-
-       new_lck = find_nestlock(tdb, offset, NULL);
-       if (new_lck) {
-               if (new_lck->owner != tdb) {
-                       return owner_conflict(tdb, "tdb_nest_lock");
-               }
-
-               if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
-                       return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                         "tdb_nest_lock:"
-                                         " offset %zu has read lock",
-                                         (size_t)offset);
-               }
-               /* Just increment the struct, posix locks don't stack. */
-               new_lck->count++;
-               return TDB_SUCCESS;
-       }
-
-#if 0
-       if (tdb->file->num_lockrecs
-           && offset >= TDB_HASH_LOCK_START
-           && offset < TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_nest_lock: already have a hash lock?");
-       }
-#endif
-
-       new_lck = (struct tdb_lock *)realloc(
-               tdb->file->lockrecs,
-               sizeof(*tdb->file->lockrecs) * (tdb->file->num_lockrecs+1));
-       if (new_lck == NULL) {
-               return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                 "tdb_nest_lock:"
-                                 " unable to allocate %zu lock struct",
-                                 tdb->file->num_lockrecs + 1);
-       }
-       tdb->file->lockrecs = new_lck;
-
-       /* Since fcntl locks don't nest, we do a lock for the first one,
-          and simply bump the count for future ones */
-       ecode = tdb_brlock(tdb, ltype, offset, 1, flags);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* First time we grab a lock, perhaps someone died in commit? */
-       if (!(flags & TDB_LOCK_NOCHECK)
-           && tdb->file->num_lockrecs == 0) {
-               tdb_bool_err berr = tdb_needs_recovery(tdb);
-               if (berr != false) {
-                       tdb_brunlock(tdb, ltype, offset, 1);
-
-                       if (berr < 0)
-                               return TDB_OFF_TO_ERR(berr);
-                       ecode = tdb_lock_and_recover(tdb);
-                       if (ecode == TDB_SUCCESS) {
-                               ecode = tdb_brlock(tdb, ltype, offset, 1,
-                                                  flags);
-                       }
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-               }
-       }
-
-       tdb->file->lockrecs[tdb->file->num_lockrecs].owner = tdb;
-       tdb->file->lockrecs[tdb->file->num_lockrecs].off = offset;
-       tdb->file->lockrecs[tdb->file->num_lockrecs].count = 1;
-       tdb->file->lockrecs[tdb->file->num_lockrecs].ltype = ltype;
-       tdb->file->num_lockrecs++;
-
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR tdb_nest_unlock(struct tdb_context *tdb,
-                                     tdb_off_t off, int ltype)
-{
-       struct tdb_lock *lck;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_NOLOCK)
-               return TDB_SUCCESS;
-
-       lck = find_nestlock(tdb, off, tdb);
-       if ((lck == NULL) || (lck->count == 0)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_nest_unlock: no lock for %zu",
-                                 (size_t)off);
-       }
-
-       if (lck->count > 1) {
-               lck->count--;
-               return TDB_SUCCESS;
-       }
-
-       /*
-        * This lock has count==1 left, so we need to unlock it in the
-        * kernel. We don't bother with decrementing the in-memory array
-        * element, we're about to overwrite it with the last array element
-        * anyway.
-        */
-       ecode = tdb_brunlock(tdb, ltype, off, 1);
-
-       /*
-        * Shrink the array by overwriting the element just unlocked with the
-        * last array element.
-        */
-       *lck = tdb->file->lockrecs[--tdb->file->num_lockrecs];
-
-       return ecode;
-}
-
-/*
-  get the transaction lock
- */
-enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype)
-{
-       return tdb_nest_lock(tdb, TDB_TRANSACTION_LOCK, ltype, TDB_LOCK_WAIT);
-}
-
-/*
-  release the transaction lock
- */
-void tdb_transaction_unlock(struct tdb_context *tdb, int ltype)
-{
-       tdb_nest_unlock(tdb, TDB_TRANSACTION_LOCK, ltype);
-}
-
-/* We only need to lock individual bytes, but Linux merges consecutive locks
- * so we lock in contiguous ranges. */
-static enum TDB_ERROR tdb_lock_gradual(struct tdb_context *tdb,
-                                      int ltype, enum tdb_lock_flags flags,
-                                      tdb_off_t off, tdb_off_t len)
-{
-       enum TDB_ERROR ecode;
-       enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT);
-
-       if (len <= 1) {
-               /* 0 would mean to end-of-file... */
-               assert(len != 0);
-               /* Single hash.  Just do blocking lock. */
-               return tdb_brlock(tdb, ltype, off, len, flags);
-       }
-
-       /* First we try non-blocking. */
-       ecode = tdb_brlock(tdb, ltype, off, len, nb_flags);
-       if (ecode != TDB_ERR_LOCK) {
-               return ecode;
-       }
-
-       /* Try locking first half, then second. */
-       ecode = tdb_lock_gradual(tdb, ltype, flags, off, len / 2);
-       if (ecode != TDB_SUCCESS)
-               return ecode;
-
-       ecode = tdb_lock_gradual(tdb, ltype, flags,
-                                off + len / 2, len - len / 2);
-       if (ecode != TDB_SUCCESS) {
-               tdb_brunlock(tdb, ltype, off, len / 2);
-       }
-       return ecode;
-}
-
-/* lock/unlock entire database.  It can only be upgradable if you have some
- * other way of guaranteeing exclusivity (ie. transaction write lock). */
-enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
-                                 enum tdb_lock_flags flags, bool upgradable)
-{
-       enum TDB_ERROR ecode;
-       tdb_bool_err berr;
-
-       if (tdb->flags & TDB_NOLOCK)
-               return TDB_SUCCESS;
-
-       if (!check_lock_pid(tdb, "tdb_allrecord_lock", true)) {
-               return TDB_ERR_LOCK;
-       }
-
-       if (tdb->file->allrecord_lock.count) {
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       return owner_conflict(tdb, "tdb_allrecord_lock");
-               }
-
-               if (ltype == F_RDLCK
-                   || tdb->file->allrecord_lock.ltype == F_WRLCK) {
-                       tdb->file->allrecord_lock.count++;
-                       return TDB_SUCCESS;
-               }
-
-               /* a global lock of a different type exists */
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                 "tdb_allrecord_lock: already have %s lock",
-                                 tdb->file->allrecord_lock.ltype == F_RDLCK
-                                 ? "read" : "write");
-       }
-
-       if (tdb_has_hash_locks(tdb)) {
-               /* can't combine global and chain locks */
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                 "tdb_allrecord_lock:"
-                                 " already have chain lock");
-       }
-
-       if (upgradable && ltype != F_RDLCK) {
-               /* tdb error: you can't upgrade a write lock! */
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_allrecord_lock:"
-                                 " can't upgrade a write lock");
-       }
-
-       tdb->stats.locks++;
-again:
-       /* Lock hashes, gradually. */
-       ecode = tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
-                                TDB_HASH_LOCK_RANGE);
-       if (ecode != TDB_SUCCESS)
-               return ecode;
-
-       /* Lock free tables: there to end of file. */
-       ecode = tdb_brlock(tdb, ltype,
-                          TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE,
-                          0, flags);
-       if (ecode != TDB_SUCCESS) {
-               tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START,
-                            TDB_HASH_LOCK_RANGE);
-               return ecode;
-       }
-
-       tdb->file->allrecord_lock.owner = tdb;
-       tdb->file->allrecord_lock.count = 1;
-       /* If it's upgradable, it's actually exclusive so we can treat
-        * it as a write lock. */
-       tdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
-       tdb->file->allrecord_lock.off = upgradable;
-
-       /* Now check for needing recovery. */
-       if (flags & TDB_LOCK_NOCHECK)
-               return TDB_SUCCESS;
-
-       berr = tdb_needs_recovery(tdb);
-       if (likely(berr == false))
-               return TDB_SUCCESS;
-
-       tdb_allrecord_unlock(tdb, ltype);
-       if (berr < 0)
-               return TDB_OFF_TO_ERR(berr);
-       ecode = tdb_lock_and_recover(tdb);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-       goto again;
-}
-
-enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb,
-                            int ltype, enum tdb_lock_flags flags)
-{
-       return tdb_nest_lock(tdb, TDB_OPEN_LOCK, ltype, flags);
-}
-
-void tdb_unlock_open(struct tdb_context *tdb, int ltype)
-{
-       tdb_nest_unlock(tdb, TDB_OPEN_LOCK, ltype);
-}
-
-bool tdb_has_open_lock(struct tdb_context *tdb)
-{
-       return !(tdb->flags & TDB_NOLOCK)
-               && find_nestlock(tdb, TDB_OPEN_LOCK, tdb) != NULL;
-}
-
-enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype)
-{
-       /* Lock doesn't protect data, so don't check (we recurse if we do!) */
-       return tdb_nest_lock(tdb, TDB_EXPANSION_LOCK, ltype,
-                            TDB_LOCK_WAIT | TDB_LOCK_NOCHECK);
-}
-
-void tdb_unlock_expand(struct tdb_context *tdb, int ltype)
-{
-       tdb_nest_unlock(tdb, TDB_EXPANSION_LOCK, ltype);
-}
-
-/* unlock entire db */
-void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
-{
-       if (tdb->flags & TDB_NOLOCK)
-               return;
-
-       if (tdb->file->allrecord_lock.count == 0) {
-               tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                          "tdb_allrecord_unlock: not locked!");
-               return;
-       }
-
-       if (tdb->file->allrecord_lock.owner != tdb) {
-               tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                          "tdb_allrecord_unlock: not locked by us!");
-               return;
-       }
-
-       /* Upgradable locks are marked as write locks. */
-       if (tdb->file->allrecord_lock.ltype != ltype
-           && (!tdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
-               tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                          "tdb_allrecord_unlock: have %s lock",
-                          tdb->file->allrecord_lock.ltype == F_RDLCK
-                          ? "read" : "write");
-               return;
-       }
-
-       if (tdb->file->allrecord_lock.count > 1) {
-               tdb->file->allrecord_lock.count--;
-               return;
-       }
-
-       tdb->file->allrecord_lock.count = 0;
-       tdb->file->allrecord_lock.ltype = 0;
-
-       tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, 0);
-}
-
-bool tdb_has_expansion_lock(struct tdb_context *tdb)
-{
-       return find_nestlock(tdb, TDB_EXPANSION_LOCK, tdb) != NULL;
-}
-
-bool tdb_has_hash_locks(struct tdb_context *tdb)
-{
-       unsigned int i;
-
-       for (i=0; i<tdb->file->num_lockrecs; i++) {
-               if (tdb->file->lockrecs[i].off >= TDB_HASH_LOCK_START
-                   && tdb->file->lockrecs[i].off < (TDB_HASH_LOCK_START
-                                                    + TDB_HASH_LOCK_RANGE))
-                       return true;
-       }
-       return false;
-}
-
-static bool tdb_has_free_lock(struct tdb_context *tdb)
-{
-       unsigned int i;
-
-       if (tdb->flags & TDB_NOLOCK)
-               return false;
-
-       for (i=0; i<tdb->file->num_lockrecs; i++) {
-               if (tdb->file->lockrecs[i].off
-                   > TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE)
-                       return true;
-       }
-       return false;
-}
-
-enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb,
-                              tdb_off_t hash_lock,
-                              tdb_len_t hash_range,
-                              int ltype, enum tdb_lock_flags waitflag)
-{
-       /* FIXME: Do this properly, using hlock_range */
-       unsigned l = TDB_HASH_LOCK_START
-               + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
-
-       /* a allrecord lock allows us to avoid per chain locks */
-       if (tdb->file->allrecord_lock.count) {
-               if (!check_lock_pid(tdb, "tdb_lock_hashes", true))
-                       return TDB_ERR_LOCK;
-
-               if (tdb->file->allrecord_lock.owner != tdb)
-                       return owner_conflict(tdb, "tdb_lock_hashes");
-               if (ltype == tdb->file->allrecord_lock.ltype
-                   || ltype == F_RDLCK) {
-                       return TDB_SUCCESS;
-               }
-
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                 "tdb_lock_hashes:"
-                                 " already have %s allrecordlock",
-                                 tdb->file->allrecord_lock.ltype == F_RDLCK
-                                 ? "read" : "write");
-       }
-
-       if (tdb_has_free_lock(tdb)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_lock_hashes: already have free lock");
-       }
-
-       if (tdb_has_expansion_lock(tdb)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_lock_hashes:"
-                                 " already have expansion lock");
-       }
-
-       return tdb_nest_lock(tdb, l, ltype, waitflag);
-}
-
-enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb,
-                                tdb_off_t hash_lock,
-                                tdb_len_t hash_range, int ltype)
-{
-       unsigned l = TDB_HASH_LOCK_START
-               + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
-
-       if (tdb->flags & TDB_NOLOCK)
-               return 0;
-
-       /* a allrecord lock allows us to avoid per chain locks */
-       if (tdb->file->allrecord_lock.count) {
-               if (tdb->file->allrecord_lock.ltype == F_RDLCK
-                   && ltype == F_WRLCK) {
-                       return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                         "tdb_unlock_hashes RO allrecord!");
-               }
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                         "tdb_unlock_hashes:"
-                                         " not locked by us!");
-               }
-               return TDB_SUCCESS;
-       }
-
-       return tdb_nest_unlock(tdb, l, ltype);
-}
-
-/* Hash locks use TDB_HASH_LOCK_START + the next 30 bits.
- * Then we begin; bucket offsets are sizeof(tdb_len_t) apart, so we divide.
- * The result is that on 32 bit systems we don't use lock values > 2^31 on
- * files that are less than 4GB.
- */
-static tdb_off_t free_lock_off(tdb_off_t b_off)
-{
-       return TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
-               + b_off / sizeof(tdb_off_t);
-}
-
-enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
-                                   enum tdb_lock_flags waitflag)
-{
-       assert(b_off >= sizeof(struct tdb_header));
-
-       if (tdb->flags & TDB_NOLOCK)
-               return 0;
-
-       /* a allrecord lock allows us to avoid per chain locks */
-       if (tdb->file->allrecord_lock.count) {
-               if (!check_lock_pid(tdb, "tdb_lock_free_bucket", true))
-                       return TDB_ERR_LOCK;
-
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       return owner_conflict(tdb, "tdb_lock_free_bucket");
-               }
-
-               if (tdb->file->allrecord_lock.ltype == F_WRLCK)
-                       return 0;
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_lock_free_bucket with"
-                                 " read-only allrecordlock!");
-       }
-
-#if 0 /* FIXME */
-       if (tdb_has_expansion_lock(tdb)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_lock_free_bucket:"
-                                 " already have expansion lock");
-       }
-#endif
-
-       return tdb_nest_lock(tdb, free_lock_off(b_off), F_WRLCK, waitflag);
-}
-
-void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off)
-{
-       if (tdb->file->allrecord_lock.count)
-               return;
-
-       tdb_nest_unlock(tdb, free_lock_off(b_off), F_WRLCK);
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_lockall(struct tdb_context *tdb)
-{
-       return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
-}
-
-_PUBLIC_ void tdb_unlockall(struct tdb_context *tdb)
-{
-       tdb_allrecord_unlock(tdb, F_WRLCK);
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_lockall_read(struct tdb_context *tdb)
-{
-       return tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
-}
-
-_PUBLIC_ void tdb_unlockall_read(struct tdb_context *tdb)
-{
-       tdb_allrecord_unlock(tdb, F_RDLCK);
-}
-
-void tdb_lock_cleanup(struct tdb_context *tdb)
-{
-       unsigned int i;
-
-       /* We don't want to warn: they're allowed to close tdb after fork. */
-       if (!check_lock_pid(tdb, "tdb_close", false))
-               return;
-
-       while (tdb->file->allrecord_lock.count
-              && tdb->file->allrecord_lock.owner == tdb) {
-               tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype);
-       }
-
-       for (i=0; i<tdb->file->num_lockrecs; i++) {
-               if (tdb->file->lockrecs[i].owner == tdb) {
-                       tdb_nest_unlock(tdb,
-                                       tdb->file->lockrecs[i].off,
-                                       tdb->file->lockrecs[i].ltype);
-                       i--;
-               }
-       }
-}
diff --git a/lib/tdb2/open.c b/lib/tdb2/open.c
deleted file mode 100644 (file)
index fab855b..0000000
+++ /dev/null
@@ -1,768 +0,0 @@
- /*
-   Trivial Database 2: opening and closing TDBs
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/build_assert/build_assert.h>
-#include <assert.h>
-
-/* all tdbs, to detect double-opens (fcntl file don't nest!) */
-static struct tdb_context *tdbs = NULL;
-
-static struct tdb_file *find_file(dev_t device, ino_t ino)
-{
-       struct tdb_context *i;
-
-       for (i = tdbs; i; i = i->next) {
-               if (i->file->device == device && i->file->inode == ino) {
-                       i->file->refcnt++;
-                       return i->file;
-               }
-       }
-       return NULL;
-}
-
-static bool read_all(int fd, void *buf, size_t len)
-{
-       while (len) {
-               ssize_t ret;
-               ret = read(fd, buf, len);
-               if (ret < 0)
-                       return false;
-               if (ret == 0) {
-                       /* ETOOSHORT? */
-                       errno = EWOULDBLOCK;
-                       return false;
-               }
-               buf = (char *)buf + ret;
-               len -= ret;
-       }
-       return true;
-}
-
-static uint64_t random_number(struct tdb_context *tdb)
-{
-       int fd;
-       uint64_t ret = 0;
-       struct timeval now;
-
-       fd = open("/dev/urandom", O_RDONLY);
-       if (fd >= 0) {
-               if (read_all(fd, &ret, sizeof(ret))) {
-                       close(fd);
-                       return ret;
-               }
-               close(fd);
-       }
-       /* FIXME: Untested!  Based on Wikipedia protocol description! */
-       fd = open("/dev/egd-pool", O_RDWR);
-       if (fd >= 0) {
-               /* Command is 1, next byte is size we want to read. */
-               char cmd[2] = { 1, sizeof(uint64_t) };
-               if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
-                       char reply[1 + sizeof(uint64_t)];
-                       int r = read(fd, reply, sizeof(reply));
-                       if (r > 1) {
-                               /* Copy at least some bytes. */
-                               memcpy(&ret, reply+1, r - 1);
-                               if (reply[0] == sizeof(uint64_t)
-                                   && r == sizeof(reply)) {
-                                       close(fd);
-                                       return ret;
-                               }
-                       }
-               }
-               close(fd);
-       }
-
-       /* Fallback: pid and time. */
-       gettimeofday(&now, NULL);
-       ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
-       tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                  "tdb_open: random from getpid and time");
-       return ret;
-}
-
-static void tdb2_context_init(struct tdb_context *tdb)
-{
-       /* Initialize the TDB2 fields here */
-       tdb_io_init(tdb);
-       tdb->direct_access = 0;
-       tdb->transaction = NULL;
-       tdb->access = NULL;
-}
-
-struct new_database {
-       struct tdb_header hdr;
-       struct tdb_freetable ftable;
-};
-
-/* initialise a new database */
-static enum TDB_ERROR tdb_new_database(struct tdb_context *tdb,
-                                      struct tdb_attribute_seed *seed,
-                                      struct tdb_header *hdr)
-{
-       /* We make it up in memory, then write it out if not internal */
-       struct new_database newdb;
-       unsigned int magic_len;
-       ssize_t rlen;
-       enum TDB_ERROR ecode;
-
-       /* Fill in the header */
-       newdb.hdr.version = TDB_VERSION;
-       if (seed)
-               newdb.hdr.hash_seed = seed->seed;
-       else
-               newdb.hdr.hash_seed = random_number(tdb);
-       newdb.hdr.hash_test = TDB_HASH_MAGIC;
-       newdb.hdr.hash_test = tdb->hash_fn(&newdb.hdr.hash_test,
-                                          sizeof(newdb.hdr.hash_test),
-                                          newdb.hdr.hash_seed,
-                                          tdb->hash_data);
-       newdb.hdr.recovery = 0;
-       newdb.hdr.features_used = newdb.hdr.features_offered = TDB_FEATURE_MASK;
-       newdb.hdr.seqnum = 0;
-       newdb.hdr.capabilities = 0;
-       memset(newdb.hdr.reserved, 0, sizeof(newdb.hdr.reserved));
-       /* Initial hashes are empty. */
-       memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable));
-
-       /* Free is empty. */
-       newdb.hdr.free_table = offsetof(struct new_database, ftable);
-       memset(&newdb.ftable, 0, sizeof(newdb.ftable));
-       ecode = set_header(NULL, &newdb.ftable.hdr, TDB_FTABLE_MAGIC, 0,
-                          sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
-                          sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
-                          0);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* Magic food */
-       memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
-       strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD);
-
-       /* This creates an endian-converted database, as if read from disk */
-       magic_len = sizeof(newdb.hdr.magic_food);
-       tdb_convert(tdb,
-                   (char *)&newdb.hdr + magic_len, sizeof(newdb) - magic_len);
-
-       *hdr = newdb.hdr;
-
-       if (tdb->flags & TDB_INTERNAL) {
-               tdb->file->map_size = sizeof(newdb);
-               tdb->file->map_ptr = malloc(tdb->file->map_size);
-               if (!tdb->file->map_ptr) {
-                       return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                         "tdb_new_database:"
-                                         " failed to allocate");
-               }
-               memcpy(tdb->file->map_ptr, &newdb, tdb->file->map_size);
-               return TDB_SUCCESS;
-       }
-       if (lseek(tdb->file->fd, 0, SEEK_SET) == -1) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_new_database:"
-                                 " failed to seek: %s", strerror(errno));
-       }
-
-       if (ftruncate(tdb->file->fd, 0) == -1) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_new_database:"
-                                 " failed to truncate: %s", strerror(errno));
-       }
-
-       rlen = write(tdb->file->fd, &newdb, sizeof(newdb));
-       if (rlen != sizeof(newdb)) {
-               if (rlen >= 0)
-                       errno = ENOSPC;
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_new_database: %zi writing header: %s",
-                                 rlen, strerror(errno));
-       }
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR tdb_new_file(struct tdb_context *tdb)
-{
-       tdb->file = malloc(sizeof(*tdb->file));
-       if (!tdb->file)
-               return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                 "tdb_open: cannot alloc tdb_file structure");
-       tdb->file->num_lockrecs = 0;
-       tdb->file->lockrecs = NULL;
-       tdb->file->allrecord_lock.count = 0;
-       tdb->file->refcnt = 1;
-       tdb->file->map_ptr = NULL;
-       return TDB_SUCCESS;
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb,
-                                const union tdb_attribute *attr)
-{
-       switch (attr->base.attr) {
-       case TDB_ATTRIBUTE_LOG:
-               tdb->log_fn = attr->log.fn;
-               tdb->log_data = attr->log.data;
-               break;
-       case TDB_ATTRIBUTE_HASH:
-       case TDB_ATTRIBUTE_SEED:
-       case TDB_ATTRIBUTE_OPENHOOK:
-               return tdb->last_error
-                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                    TDB_LOG_USE_ERROR,
-                                    "tdb_set_attribute:"
-                                    " cannot set %s after opening",
-                                    attr->base.attr == TDB_ATTRIBUTE_HASH
-                                    ? "TDB_ATTRIBUTE_HASH"
-                                    : attr->base.attr == TDB_ATTRIBUTE_SEED
-                                    ? "TDB_ATTRIBUTE_SEED"
-                                    : "TDB_ATTRIBUTE_OPENHOOK");
-       case TDB_ATTRIBUTE_STATS:
-               return tdb->last_error
-                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                    TDB_LOG_USE_ERROR,
-                                    "tdb_set_attribute:"
-                                    " cannot set TDB_ATTRIBUTE_STATS");
-       case TDB_ATTRIBUTE_FLOCK:
-               tdb->lock_fn = attr->flock.lock;
-               tdb->unlock_fn = attr->flock.unlock;
-               tdb->lock_data = attr->flock.data;
-               break;
-       default:
-               return tdb->last_error
-                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                    TDB_LOG_USE_ERROR,
-                                    "tdb_set_attribute:"
-                                    " unknown attribute type %u",
-                                    attr->base.attr);
-       }
-       return TDB_SUCCESS;
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb,
-                                union tdb_attribute *attr)
-{
-       switch (attr->base.attr) {
-       case TDB_ATTRIBUTE_LOG:
-               if (!tdb->log_fn)
-                       return tdb->last_error = TDB_ERR_NOEXIST;
-               attr->log.fn = tdb->log_fn;
-               attr->log.data = tdb->log_data;
-               break;
-       case TDB_ATTRIBUTE_HASH:
-               attr->hash.fn = tdb->hash_fn;
-               attr->hash.data = tdb->hash_data;
-               break;
-       case TDB_ATTRIBUTE_SEED:
-               attr->seed.seed = tdb->hash_seed;
-               break;
-       case TDB_ATTRIBUTE_OPENHOOK:
-               if (!tdb->openhook)
-                       return tdb->last_error = TDB_ERR_NOEXIST;
-               attr->openhook.fn = tdb->openhook;
-               attr->openhook.data = tdb->openhook_data;
-               break;
-       case TDB_ATTRIBUTE_STATS: {
-               size_t size = attr->stats.size;
-               if (size > tdb->stats.size)
-                       size = tdb->stats.size;
-               memcpy(&attr->stats, &tdb->stats, size);
-               break;
-       }
-       case TDB_ATTRIBUTE_FLOCK:
-               attr->flock.lock = tdb->lock_fn;
-               attr->flock.unlock = tdb->unlock_fn;
-               attr->flock.data = tdb->lock_data;
-               break;
-       default:
-               return tdb->last_error
-                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                    TDB_LOG_USE_ERROR,
-                                    "tdb_get_attribute:"
-                                    " unknown attribute type %u",
-                                    attr->base.attr);
-       }
-       attr->base.next = NULL;
-       return TDB_SUCCESS;
-}
-
-_PUBLIC_ void tdb_unset_attribute(struct tdb_context *tdb,
-                        enum tdb_attribute_type type)
-{
-       switch (type) {
-       case TDB_ATTRIBUTE_LOG:
-               tdb->log_fn = NULL;
-               break;
-       case TDB_ATTRIBUTE_OPENHOOK:
-               tdb->openhook = NULL;
-               break;
-       case TDB_ATTRIBUTE_HASH:
-       case TDB_ATTRIBUTE_SEED:
-               tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                          "tdb_unset_attribute: cannot unset %s after opening",
-                          type == TDB_ATTRIBUTE_HASH
-                          ? "TDB_ATTRIBUTE_HASH"
-                          : "TDB_ATTRIBUTE_SEED");
-               break;
-       case TDB_ATTRIBUTE_STATS:
-               tdb_logerr(tdb, TDB_ERR_EINVAL,
-                          TDB_LOG_USE_ERROR,
-                          "tdb_unset_attribute:"
-                          "cannot unset TDB_ATTRIBUTE_STATS");
-               break;
-       case TDB_ATTRIBUTE_FLOCK:
-               tdb->lock_fn = tdb_fcntl_lock;
-               tdb->unlock_fn = tdb_fcntl_unlock;
-               break;
-       default:
-               tdb_logerr(tdb, TDB_ERR_EINVAL,
-                          TDB_LOG_USE_ERROR,
-                          "tdb_unset_attribute: unknown attribute type %u",
-                          type);
-       }
-}
-
-/* The top three bits of the capability tell us whether it matters. */
-enum TDB_ERROR unknown_capability(struct tdb_context *tdb, const char *caller,
-                                 tdb_off_t type)
-{
-       if (type & TDB_CAP_NOOPEN) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "%s: file has unknown capability %llu",
-                                 caller, type & TDB_CAP_NOOPEN);
-       }
-
-       if ((type & TDB_CAP_NOWRITE) && !(tdb->flags & TDB_RDONLY)) {
-               return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_ERROR,
-                                 "%s: file has unknown capability %llu"
-                                 " (cannot write to it)",
-                                 caller, type & TDB_CAP_NOOPEN);
-       }
-
-       if (type & TDB_CAP_NOCHECK) {
-               tdb->flags |= TDB_CANT_CHECK;
-       }
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR capabilities_ok(struct tdb_context *tdb,
-                                     tdb_off_t capabilities)
-{
-       tdb_off_t off, next;
-       enum TDB_ERROR ecode = TDB_SUCCESS;
-       const struct tdb_capability *cap;
-
-       /* Check capability list. */
-       for (off = capabilities; off && ecode == TDB_SUCCESS; off = next) {
-               cap = tdb_access_read(tdb, off, sizeof(*cap), true);
-               if (TDB_PTR_IS_ERR(cap)) {
-                       return TDB_PTR_ERR(cap);
-               }
-
-               switch (cap->type & TDB_CAP_TYPE_MASK) {
-               /* We don't understand any capabilities (yet). */
-               default:
-                       ecode = unknown_capability(tdb, "tdb_open", cap->type);
-               }
-               next = cap->next;
-               tdb_access_release(tdb, cap);
-       }
-       return ecode;
-}
-
-_PUBLIC_ struct tdb_context *tdb_open(const char *name, int tdb_flags,
-                            int open_flags, mode_t mode,
-                            union tdb_attribute *attr)
-{
-       struct tdb_context *tdb;
-       struct stat st;
-       int saved_errno = 0;
-       uint64_t hash_test;
-       unsigned v;
-       ssize_t rlen;
-       struct tdb_header hdr;
-       struct tdb_attribute_seed *seed = NULL;
-       tdb_bool_err berr;
-       enum TDB_ERROR ecode;
-       int openlock;
-
-       tdb = malloc(sizeof(*tdb) + (name ? strlen(name) + 1 : 0));
-       if (!tdb) {
-               /* Can't log this */
-               errno = ENOMEM;
-               return NULL;
-       }
-       /* Set name immediately for logging functions. */
-       if (name) {
-               tdb->name = strcpy((char *)(tdb + 1), name);
-       } else {
-               tdb->name = NULL;
-       }
-       tdb->flags = tdb_flags;
-       tdb->log_fn = NULL;
-       tdb->open_flags = open_flags;
-       tdb->last_error = TDB_SUCCESS;
-       tdb->file = NULL;
-       tdb->openhook = NULL;
-       tdb->lock_fn = tdb_fcntl_lock;
-       tdb->unlock_fn = tdb_fcntl_unlock;
-       tdb->hash_fn = tdb_jenkins_hash;
-       memset(&tdb->stats, 0, sizeof(tdb->stats));
-       tdb->stats.base.attr = TDB_ATTRIBUTE_STATS;
-       tdb->stats.size = sizeof(tdb->stats);
-
-       while (attr) {
-               switch (attr->base.attr) {
-               case TDB_ATTRIBUTE_HASH:
-                       tdb->hash_fn = attr->hash.fn;
-                       tdb->hash_data = attr->hash.data;
-                       break;
-               case TDB_ATTRIBUTE_SEED:
-                       seed = &attr->seed;
-                       break;
-               case TDB_ATTRIBUTE_OPENHOOK:
-                       tdb->openhook = attr->openhook.fn;
-                       tdb->openhook_data = attr->openhook.data;
-                       break;
-               default:
-                       /* These are set as normal. */
-                       ecode = tdb_set_attribute(tdb, attr);
-                       if (ecode != TDB_SUCCESS)
-                               goto fail;
-               }
-               attr = attr->base.next;
-       }
-
-       if (tdb_flags & ~(TDB_INTERNAL | TDB_NOLOCK | TDB_NOMMAP | TDB_CONVERT
-                         | TDB_NOSYNC | TDB_SEQNUM | TDB_ALLOW_NESTING
-                         | TDB_RDONLY)) {
-               ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                  "tdb_open: unknown flags %u", tdb_flags);
-               goto fail;
-       }
-
-       if (seed) {
-               if (!(tdb_flags & TDB_INTERNAL) && !(open_flags & O_CREAT)) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                          TDB_LOG_USE_ERROR,
-                                          "tdb_open:"
-                                          " cannot set TDB_ATTRIBUTE_SEED"
-                                          " without O_CREAT.");
-                       goto fail;
-               }
-       }
-
-       if ((open_flags & O_ACCMODE) == O_WRONLY) {
-               ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                  "tdb_open: can't open tdb %s write-only",
-                                  name);
-               goto fail;
-       }
-
-       if ((open_flags & O_ACCMODE) == O_RDONLY) {
-               openlock = F_RDLCK;
-               tdb->flags |= TDB_RDONLY;
-       } else {
-               if (tdb_flags & TDB_RDONLY) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                          TDB_LOG_USE_ERROR,
-                                          "tdb_open: can't use TDB_RDONLY"
-                                          " without O_RDONLY");
-                       goto fail;
-               }
-               openlock = F_WRLCK;
-       }
-
-       /* internal databases don't need any of the rest. */
-       if (tdb->flags & TDB_INTERNAL) {
-               tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
-               ecode = tdb_new_file(tdb);
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-               tdb->file->fd = -1;
-               ecode = tdb_new_database(tdb, seed, &hdr);
-               if (ecode == TDB_SUCCESS) {
-                       tdb_convert(tdb, &hdr.hash_seed,
-                                   sizeof(hdr.hash_seed));
-                       tdb->hash_seed = hdr.hash_seed;
-                       tdb2_context_init(tdb);
-                       tdb_ftable_init(tdb);
-               }
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-               return tdb;
-       }
-
-       if (stat(name, &st) != -1)
-               tdb->file = find_file(st.st_dev, st.st_ino);
-
-       if (!tdb->file) {
-               int fd;
-
-               if ((fd = open(name, open_flags, mode)) == -1) {
-                       /* errno set by open(2) */
-                       saved_errno = errno;
-                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open: could not open file %s: %s",
-                                  name, strerror(errno));
-                       goto fail_errno;
-               }
-
-               /* on exec, don't inherit the fd */
-               v = fcntl(fd, F_GETFD, 0);
-               fcntl(fd, F_SETFD, v | FD_CLOEXEC);
-
-               if (fstat(fd, &st) == -1) {
-                       saved_errno = errno;
-                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open: could not stat open %s: %s",
-                                  name, strerror(errno));
-                       close(fd);
-                       goto fail_errno;
-               }
-
-               ecode = tdb_new_file(tdb);
-               if (ecode != TDB_SUCCESS) {
-                       close(fd);
-                       goto fail;
-               }
-
-               tdb->file->fd = fd;
-               tdb->file->device = st.st_dev;
-               tdb->file->inode = st.st_ino;
-               tdb->file->map_ptr = NULL;
-               tdb->file->map_size = 0;
-       }
-
-       /* ensure there is only one process initialising at once */
-       ecode = tdb_lock_open(tdb, openlock, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
-       if (ecode != TDB_SUCCESS) {
-               saved_errno = errno;
-               goto fail_errno;
-       }
-
-       /* call their open hook if they gave us one. */
-       if (tdb->openhook) {
-               ecode = tdb->openhook(tdb->file->fd, tdb->openhook_data);
-               if (ecode != TDB_SUCCESS) {
-                       tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                  "tdb_open: open hook failed");
-                       goto fail;
-               }
-               open_flags |= O_CREAT;
-       }
-
-       /* If they used O_TRUNC, read will return 0. */
-       rlen = pread(tdb->file->fd, &hdr, sizeof(hdr), 0);
-       if (rlen == 0 && (open_flags & O_CREAT)) {
-               ecode = tdb_new_database(tdb, seed, &hdr);
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-       } else if (rlen < 0) {
-               ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open: error %s reading %s",
-                                  strerror(errno), name);
-               goto fail;
-       } else if (rlen < sizeof(hdr)
-                  || strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
-               ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open: %s is not a tdb2 file", name);
-               goto fail;
-       }
-
-       if (hdr.version != TDB_VERSION) {
-               if (hdr.version == bswap_64(TDB_VERSION))
-                       tdb->flags |= TDB_CONVERT;
-               else {
-                       /* wrong version */
-                       ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                          "tdb_open:"
-                                          " %s is unknown version 0x%llx",
-                                          name, (long long)hdr.version);
-                       goto fail;
-               }
-       } else if (tdb->flags & TDB_CONVERT) {
-               ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open:"
-                                  " %s does not need TDB_CONVERT",
-                                  name);
-               goto fail;
-       }
-
-       tdb2_context_init(tdb);
-
-       tdb_convert(tdb, &hdr, sizeof(hdr));
-       tdb->hash_seed = hdr.hash_seed;
-       hash_test = TDB_HASH_MAGIC;
-       hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
-       if (hdr.hash_test != hash_test) {
-               /* wrong hash variant */
-               ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open:"
-                                  " %s uses a different hash function",
-                                  name);
-               goto fail;
-       }
-
-       ecode = capabilities_ok(tdb, hdr.capabilities);
-       if (ecode != TDB_SUCCESS) {
-               goto fail;
-       }
-
-       /* Clear any features we don't understand. */
-       if ((open_flags & O_ACCMODE) != O_RDONLY) {
-               hdr.features_used &= TDB_FEATURE_MASK;
-               ecode = tdb_write_convert(tdb, offsetof(struct tdb_header,
-                                                       features_used),
-                                         &hdr.features_used,
-                                         sizeof(hdr.features_used));
-               if (ecode != TDB_SUCCESS)
-                       goto fail;
-       }
-
-       tdb_unlock_open(tdb, openlock);
-
-       /* This makes sure we have current map_size and mmap. */
-       ecode = tdb->io->oob(tdb, tdb->file->map_size, 1, true);
-       if (unlikely(ecode != TDB_SUCCESS))
-               goto fail;
-
-       /* Now it's fully formed, recover if necessary. */
-       berr = tdb_needs_recovery(tdb);
-       if (unlikely(berr != false)) {
-               if (berr < 0) {
-                       ecode = TDB_OFF_TO_ERR(berr);
-                       goto fail;
-               }
-               ecode = tdb_lock_and_recover(tdb);
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-       }
-
-       ecode = tdb_ftable_init(tdb);
-       if (ecode != TDB_SUCCESS) {
-               goto fail;
-       }
-
-       tdb->next = tdbs;
-       tdbs = tdb;
-       return tdb;
-
- fail:
-       /* Map ecode to some logical errno. */
-       switch (TDB_ERR_TO_OFF(ecode)) {
-       case TDB_ERR_TO_OFF(TDB_ERR_CORRUPT):
-       case TDB_ERR_TO_OFF(TDB_ERR_IO):
-               saved_errno = EIO;
-               break;
-       case TDB_ERR_TO_OFF(TDB_ERR_LOCK):
-               saved_errno = EWOULDBLOCK;
-               break;
-       case TDB_ERR_TO_OFF(TDB_ERR_OOM):
-               saved_errno = ENOMEM;
-               break;
-       case TDB_ERR_TO_OFF(TDB_ERR_EINVAL):
-               saved_errno = EINVAL;
-               break;
-       default:
-               saved_errno = EINVAL;
-               break;
-       }
-
-fail_errno:
-#ifdef TDB_TRACE
-       close(tdb->tracefd);
-#endif
-       if (tdb->file) {
-               tdb_lock_cleanup(tdb);
-               if (--tdb->file->refcnt == 0) {
-                       assert(tdb->file->num_lockrecs == 0);
-                       if (tdb->file->map_ptr) {
-                               if (tdb->flags & TDB_INTERNAL) {
-                                       free(tdb->file->map_ptr);
-                               } else
-                                       tdb_munmap(tdb->file);
-                       }
-                       if (close(tdb->file->fd) != 0)
-                               tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                          "tdb_open: failed to close tdb fd"
-                                          " on error: %s", strerror(errno));
-                       free(tdb->file->lockrecs);
-                       free(tdb->file);
-               }
-       }
-
-       free(tdb);
-       errno = saved_errno;
-       return NULL;
-}
-
-_PUBLIC_ int tdb_close(struct tdb_context *tdb)
-{
-       int ret = 0;
-       struct tdb_context **i;
-
-       tdb_trace(tdb, "tdb_close");
-
-       if (tdb->transaction) {
-               tdb_transaction_cancel(tdb);
-       }
-
-       if (tdb->file->map_ptr) {
-               if (tdb->flags & TDB_INTERNAL)
-                       free(tdb->file->map_ptr);
-               else
-                       tdb_munmap(tdb->file);
-       }
-       if (tdb->file) {
-               tdb_lock_cleanup(tdb);
-               if (--tdb->file->refcnt == 0) {
-                       ret = close(tdb->file->fd);
-                       free(tdb->file->lockrecs);
-                       free(tdb->file);
-               }
-       }
-
-       /* Remove from tdbs list */
-       for (i = &tdbs; *i; i = &(*i)->next) {
-               if (*i == tdb) {
-                       *i = tdb->next;
-                       break;
-               }
-       }
-
-#ifdef TDB_TRACE
-       close(tdb->tracefd);
-#endif
-       free(tdb);
-
-       return ret;
-}
-
-_PUBLIC_ void tdb_foreach_(int (*fn)(struct tdb_context *, void *), void *p)
-{
-       struct tdb_context *i;
-
-       for (i = tdbs; i; i = i->next) {
-               if (fn(i, p) != 0)
-                       break;
-       }
-}
diff --git a/lib/tdb2/private.h b/lib/tdb2/private.h
deleted file mode 100644 (file)
index 8c917a7..0000000
+++ /dev/null
@@ -1,657 +0,0 @@
-#ifndef TDB_PRIVATE_H
-#define TDB_PRIVATE_H
- /*
-   Trivial Database 2: private types and prototypes
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "config.h"
-#ifndef HAVE_CCAN
-#error You need ccan to build tdb2!
-#endif
-#include "tdb2.h"
-#include <ccan/compiler/compiler.h>
-#include <ccan/likely/likely.h>
-#include <ccan/endian/endian.h>
-
-#ifdef HAVE_LIBREPLACE
-#include "replace.h"
-#include "system/filesys.h"
-#include "system/time.h"
-#include "system/shmem.h"
-#include "system/select.h"
-#include "system/wait.h"
-#else
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <sys/time.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdio.h>
-#include <utime.h>
-#include <unistd.h>
-#endif
-
-#ifndef TEST_IT
-#define TEST_IT(cond)
-#endif
-
-/* #define TDB_TRACE 1 */
-
-#ifndef __STRING
-#define __STRING(x)    #x
-#endif
-
-#ifndef __STRINGSTRING
-#define __STRINGSTRING(x) __STRING(x)
-#endif
-
-#ifndef __location__
-#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__)
-#endif
-
-typedef uint64_t tdb_len_t;
-typedef uint64_t tdb_off_t;
-
-#define TDB_MAGIC_FOOD "TDB file\n"
-#define TDB_VERSION ((uint64_t)(0x26011967 + 7))
-#define TDB_USED_MAGIC ((uint64_t)0x1999)
-#define TDB_HTABLE_MAGIC ((uint64_t)0x1888)
-#define TDB_CHAIN_MAGIC ((uint64_t)0x1777)
-#define TDB_FTABLE_MAGIC ((uint64_t)0x1666)
-#define TDB_CAP_MAGIC ((uint64_t)0x1555)
-#define TDB_FREE_MAGIC ((uint64_t)0xFE)
-#define TDB_HASH_MAGIC (0xA1ABE11A01092008ULL)
-#define TDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL)
-#define TDB_RECOVERY_INVALID_MAGIC (0x0ULL)
-
-/* Capability bits. */
-#define TDB_CAP_TYPE_MASK      0x1FFFFFFFFFFFFFFFULL
-#define TDB_CAP_NOCHECK                0x8000000000000000ULL
-#define TDB_CAP_NOWRITE                0x4000000000000000ULL
-#define TDB_CAP_NOOPEN         0x2000000000000000ULL
-
-#define TDB_OFF_IS_ERR(off) unlikely(off >= (tdb_off_t)(long)TDB_ERR_LAST)
-#define TDB_OFF_TO_ERR(off) ((enum TDB_ERROR)(long)(off))
-#define TDB_ERR_TO_OFF(ecode) ((tdb_off_t)(long)(ecode))
-
-/* Packing errors into pointers and v.v. */
-#define TDB_PTR_IS_ERR(ptr) \
-       unlikely((unsigned long)(ptr) >= (unsigned long)TDB_ERR_LAST)
-#define TDB_PTR_ERR(p) ((enum TDB_ERROR)(long)(p))
-#define TDB_ERR_PTR(err) ((void *)(long)(err))
-
-/* Common case of returning true, false or -ve error. */
-typedef int tdb_bool_err;
-
-/* Prevent others from opening the file. */
-#define TDB_OPEN_LOCK 0
-/* Expanding file. */
-#define TDB_EXPANSION_LOCK 2
-/* Doing a transaction. */
-#define TDB_TRANSACTION_LOCK 8
-/* Hash chain locks. */
-#define TDB_HASH_LOCK_START 64
-
-/* Range for hash locks. */
-#define TDB_HASH_LOCK_RANGE_BITS 30
-#define TDB_HASH_LOCK_RANGE (1 << TDB_HASH_LOCK_RANGE_BITS)
-
-/* We have 1024 entries in the top level. */
-#define TDB_TOPLEVEL_HASH_BITS 10
-/* And 64 entries in each sub-level: thus 64 bits exactly after 9 levels. */
-#define TDB_SUBLEVEL_HASH_BITS 6
-/* And 8 entries in each group, ie 8 groups per sublevel. */
-#define TDB_HASH_GROUP_BITS 3
-/* This is currently 10: beyond this we chain. */
-#define TDB_MAX_LEVELS (1+(64-TDB_TOPLEVEL_HASH_BITS) / TDB_SUBLEVEL_HASH_BITS)
-
-/* Extend file by least 100 times larger than needed. */
-#define TDB_EXTENSION_FACTOR 100
-
-/* We steal bits from the offsets to store hash info. */
-#define TDB_OFF_HASH_GROUP_MASK ((1ULL << TDB_HASH_GROUP_BITS) - 1)
-/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */
-#define TDB_OFF_UPPER_STEAL 8
-#define   TDB_OFF_UPPER_STEAL_EXTRA 7
-/* The bit number where we store extra hash bits. */
-#define TDB_OFF_HASH_EXTRA_BIT 57
-#define TDB_OFF_UPPER_STEAL_SUBHASH_BIT 56
-
-/* Additional features we understand.  Currently: none. */
-#define TDB_FEATURE_MASK ((uint64_t)0)
-
-/* The bit number where we store the extra hash bits. */
-/* Convenience mask to get actual offset. */
-#define TDB_OFF_MASK \
-       (((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1) - TDB_OFF_HASH_GROUP_MASK)
-
-/* How many buckets in a free list: see size_to_bucket(). */
-#define TDB_FREE_BUCKETS (64 - TDB_OFF_UPPER_STEAL)
-
-/* We have to be able to fit a free record here. */
-#define TDB_MIN_DATA_LEN       \
-       (sizeof(struct tdb_free_record) - sizeof(struct tdb_used_record))
-
-/* Indicates this entry is not on an flist (can happen during coalescing) */
-#define TDB_FTABLE_NONE ((1ULL << TDB_OFF_UPPER_STEAL) - 1)
-
-struct tdb_used_record {
-       /* For on-disk compatibility, we avoid bitfields:
-          magic: 16,        (highest)
-          key_len_bits: 5,
-          extra_padding: 32
-          hash_bits: 11
-       */
-        uint64_t magic_and_meta;
-       /* The bottom key_len_bits*2 are key length, rest is data length. */
-        uint64_t key_and_data_len;
-};
-
-static inline unsigned rec_key_bits(const struct tdb_used_record *r)
-{
-       return ((r->magic_and_meta >> 43) & ((1 << 5)-1)) * 2;
-}
-
-static inline uint64_t rec_key_length(const struct tdb_used_record *r)
-{
-       return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1);
-}
-
-static inline uint64_t rec_data_length(const struct tdb_used_record *r)
-{
-       return r->key_and_data_len >> rec_key_bits(r);
-}
-
-static inline uint64_t rec_extra_padding(const struct tdb_used_record *r)
-{
-       return (r->magic_and_meta >> 11) & 0xFFFFFFFF;
-}
-
-static inline uint32_t rec_hash(const struct tdb_used_record *r)
-{
-       return r->magic_and_meta & ((1 << 11) - 1);
-}
-
-static inline uint16_t rec_magic(const struct tdb_used_record *r)
-{
-       return (r->magic_and_meta >> 48);
-}
-
-struct tdb_free_record {
-        uint64_t magic_and_prev; /* TDB_OFF_UPPER_STEAL bits magic, then prev */
-        uint64_t ftable_and_len; /* Len not counting these two fields. */
-       /* This is why the minimum record size is 8 bytes.  */
-       uint64_t next;
-};
-
-static inline uint64_t frec_prev(const struct tdb_free_record *f)
-{
-       return f->magic_and_prev & ((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1);
-}
-
-static inline uint64_t frec_magic(const struct tdb_free_record *f)
-{
-       return f->magic_and_prev >> (64 - TDB_OFF_UPPER_STEAL);
-}
-
-static inline uint64_t frec_len(const struct tdb_free_record *f)
-{
-       return f->ftable_and_len & ((1ULL << (64 - TDB_OFF_UPPER_STEAL))-1);
-}
-
-static inline unsigned frec_ftable(const struct tdb_free_record *f)
-{
-       return f->ftable_and_len >> (64 - TDB_OFF_UPPER_STEAL);
-}
-
-struct tdb_recovery_record {
-       uint64_t magic;
-       /* Length of record (add this header to get total length). */
-       uint64_t max_len;
-       /* Length used. */
-       uint64_t len;
-       /* Old length of file before transaction. */
-       uint64_t eof;
-};
-
-/* If we bottom out of the subhashes, we chain. */
-struct tdb_chain {
-       tdb_off_t rec[1 << TDB_HASH_GROUP_BITS];
-       tdb_off_t next;
-};
-
-/* this is stored at the front of every database */
-struct tdb_header {
-       char magic_food[64]; /* for /etc/magic */
-       /* FIXME: Make me 32 bit? */
-       uint64_t version; /* version of the code */
-       uint64_t hash_test; /* result of hashing HASH_MAGIC. */
-       uint64_t hash_seed; /* "random" seed written at creation time. */
-       tdb_off_t free_table; /* (First) free table. */
-       tdb_off_t recovery; /* Transaction recovery area. */
-
-       uint64_t features_used; /* Features all writers understand */
-       uint64_t features_offered; /* Features offered */
-
-       uint64_t seqnum; /* Sequence number for TDB_SEQNUM */
-
-       tdb_off_t capabilities; /* Optional linked list of capabilities. */
-       tdb_off_t reserved[22];
-
-       /* Top level hash table. */
-       tdb_off_t hashtable[1ULL << TDB_TOPLEVEL_HASH_BITS];
-};
-
-struct tdb_freetable {
-       struct tdb_used_record hdr;
-       tdb_off_t next;
-       tdb_off_t buckets[TDB_FREE_BUCKETS];
-};
-
-struct tdb_capability {
-       struct tdb_used_record hdr;
-       tdb_off_t type;
-       tdb_off_t next;
-       /* ... */
-};
-
-/* Information about a particular (locked) hash entry. */
-struct hash_info {
-       /* Full hash value of entry. */
-       uint64_t h;
-       /* Start and length of lock acquired. */
-       tdb_off_t hlock_start;
-       tdb_len_t hlock_range;
-       /* Start of hash group. */
-       tdb_off_t group_start;
-       /* Bucket we belong in. */
-       unsigned int home_bucket;
-       /* Bucket we (or an empty space) were found in. */
-       unsigned int found_bucket;
-       /* How many bits of the hash are already used. */
-       unsigned int hash_used;
-       /* Current working group. */
-       tdb_off_t group[1 << TDB_HASH_GROUP_BITS];
-};
-
-struct traverse_info {
-       struct traverse_level {
-               tdb_off_t hashtable;
-               /* We ignore groups here, and treat it as a big array. */
-               unsigned entry;
-               unsigned int total_buckets;
-       } levels[TDB_MAX_LEVELS + 1];
-       unsigned int num_levels;
-       unsigned int toplevel_group;
-       /* This makes delete-everything-inside-traverse work as expected. */
-       tdb_off_t prev;
-};
-
-enum tdb_lock_flags {
-       /* WAIT == F_SETLKW, NOWAIT == F_SETLK */
-       TDB_LOCK_NOWAIT = 0,
-       TDB_LOCK_WAIT = 1,
-       /* If set, don't log an error on failure. */
-       TDB_LOCK_PROBE = 2,
-       /* If set, don't check for recovery (used by recovery code). */
-       TDB_LOCK_NOCHECK = 4,
-};
-
-struct tdb_lock {
-       struct tdb_context *owner;
-       off_t off;
-       uint32_t count;
-       uint32_t ltype;
-};
-
-/* This is only needed for tdb_access_commit, but used everywhere to
- * simplify. */
-struct tdb_access_hdr {
-       struct tdb_access_hdr *next;
-       tdb_off_t off;
-       tdb_len_t len;
-       bool convert;
-};
-
-struct tdb_file {
-       /* How many are sharing us? */
-       unsigned int refcnt;
-
-       /* Mmap (if any), or malloc (for TDB_INTERNAL). */
-       void *map_ptr;
-
-       /* How much space has been mapped (<= current file size) */
-       tdb_len_t map_size;
-
-       /* The file descriptor (-1 for TDB_INTERNAL). */
-       int fd;
-
-       /* Lock information */
-       pid_t locker;
-       struct tdb_lock allrecord_lock;
-       size_t num_lockrecs;
-       struct tdb_lock *lockrecs;
-
-       /* Identity of this file. */
-       dev_t device;
-       ino_t inode;
-};
-
-struct tdb_methods {
-       enum TDB_ERROR (*tread)(struct tdb_context *, tdb_off_t, void *,
-                               tdb_len_t);
-       enum TDB_ERROR (*twrite)(struct tdb_context *, tdb_off_t, const void *,
-                                tdb_len_t);
-       enum TDB_ERROR (*oob)(struct tdb_context *, tdb_off_t, tdb_len_t, bool);
-       enum TDB_ERROR (*expand_file)(struct tdb_context *, tdb_len_t);
-       void *(*direct)(struct tdb_context *, tdb_off_t, size_t, bool);
-};
-
-/*
-  internal prototypes
-*/
-/* hash.c: */
-uint64_t tdb_jenkins_hash(const void *key, size_t length, uint64_t seed,
-                         void *unused);
-
-enum TDB_ERROR first_in_hash(struct tdb_context *tdb,
-                            struct traverse_info *tinfo,
-                            TDB_DATA *kbuf, size_t *dlen);
-
-enum TDB_ERROR next_in_hash(struct tdb_context *tdb,
-                           struct traverse_info *tinfo,
-                           TDB_DATA *kbuf, size_t *dlen);
-
-/* Hash random memory. */
-uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len);
-
-/* Hash on disk. */
-uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off);
-
-/* Find and lock a hash entry (or where it would be). */
-tdb_off_t find_and_lock(struct tdb_context *tdb,
-                       struct tdb_data key,
-                       int ltype,
-                       struct hash_info *h,
-                       struct tdb_used_record *rec,
-                       struct traverse_info *tinfo);
-
-enum TDB_ERROR replace_in_hash(struct tdb_context *tdb,
-                              struct hash_info *h,
-                              tdb_off_t new_off);
-
-enum TDB_ERROR add_to_hash(struct tdb_context *tdb, struct hash_info *h,
-                          tdb_off_t new_off);
-
-enum TDB_ERROR delete_from_hash(struct tdb_context *tdb, struct hash_info *h);
-
-/* For tdb_check */
-bool is_subhash(tdb_off_t val);
-enum TDB_ERROR unknown_capability(struct tdb_context *tdb, const char *caller,
-                                 tdb_off_t type);
-
-/* free.c: */
-enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb);
-
-/* check.c needs these to iterate through free lists. */
-tdb_off_t first_ftable(struct tdb_context *tdb);
-tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable);
-
-/* This returns space or -ve error number. */
-tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
-               uint64_t hash, unsigned magic, bool growing);
-
-/* Put this record in a free list. */
-enum TDB_ERROR add_free_record(struct tdb_context *tdb,
-                              tdb_off_t off, tdb_len_t len_with_header,
-                              enum tdb_lock_flags waitflag,
-                              bool coalesce_ok);
-
-/* Set up header for a used/ftable/htable/chain/capability record. */
-enum TDB_ERROR set_header(struct tdb_context *tdb,
-                         struct tdb_used_record *rec,
-                         unsigned magic, uint64_t keylen, uint64_t datalen,
-                         uint64_t actuallen, unsigned hashlow);
-
-/* Used by tdb_check to verify. */
-unsigned int size_to_bucket(tdb_len_t data_len);
-tdb_off_t bucket_off(tdb_off_t ftable_off, unsigned bucket);
-
-/* Used by tdb_summary */
-tdb_off_t dead_space(struct tdb_context *tdb, tdb_off_t off);
-
-/* Adjust expansion, used by create_recovery_area */
-tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size);
-
-/* io.c: */
-/* Initialize tdb->methods. */
-void tdb_io_init(struct tdb_context *tdb);
-
-/* Convert endian of the buffer if required. */
-void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size);
-
-/* Unmap and try to map the tdb. */
-void tdb_munmap(struct tdb_file *file);
-enum TDB_ERROR tdb_mmap(struct tdb_context *tdb);
-
-/* Either alloc a copy, or give direct access.  Release frees or noop. */
-const void *tdb_access_read(struct tdb_context *tdb,
-                           tdb_off_t off, tdb_len_t len, bool convert);
-void *tdb_access_write(struct tdb_context *tdb,
-                      tdb_off_t off, tdb_len_t len, bool convert);
-
-/* Release result of tdb_access_read/write. */
-void tdb_access_release(struct tdb_context *tdb, const void *p);
-/* Commit result of tdb_acces_write. */
-enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p);
-
-/* Convenience routine to get an offset. */
-tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off);
-
-/* Write an offset at an offset. */
-enum TDB_ERROR tdb_write_off(struct tdb_context *tdb, tdb_off_t off,
-                            tdb_off_t val);
-
-/* Clear an ondisk area. */
-enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len);
-
-/* Return a non-zero offset between >= start < end in this array (or end). */
-tdb_off_t tdb_find_nonzero_off(struct tdb_context *tdb,
-                              tdb_off_t base,
-                              uint64_t start,
-                              uint64_t end);
-
-/* Return a zero offset in this array, or num. */
-tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
-                           uint64_t num);
-
-/* Allocate and make a copy of some offset. */
-void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len);
-
-/* Writes a converted copy of a record. */
-enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
-                                const void *rec, size_t len);
-
-/* Reads record and converts it */
-enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
-                               void *rec, size_t len);
-
-/* Bump the seqnum (caller checks for tdb->flags & TDB_SEQNUM) */
-void tdb_inc_seqnum(struct tdb_context *tdb);
-
-/* lock.c: */
-/* Print message because another tdb owns a lock we want. */
-enum TDB_ERROR owner_conflict(struct tdb_context *tdb, const char *call);
-
-/* If we fork, we no longer really own locks. */
-bool check_lock_pid(struct tdb_context *tdb, const char *call, bool log);
-
-/* Lock/unlock a range of hashes. */
-enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb,
-                              tdb_off_t hash_lock, tdb_len_t hash_range,
-                              int ltype, enum tdb_lock_flags waitflag);
-enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb,
-                                tdb_off_t hash_lock,
-                                tdb_len_t hash_range, int ltype);
-
-/* For closing the file. */
-void tdb_lock_cleanup(struct tdb_context *tdb);
-
-/* Lock/unlock a particular free bucket. */
-enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
-                                   enum tdb_lock_flags waitflag);
-void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off);
-
-/* Serialize transaction start. */
-enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype);
-void tdb_transaction_unlock(struct tdb_context *tdb, int ltype);
-
-/* Do we have any hash locks (ie. via tdb_chainlock) ? */
-bool tdb_has_hash_locks(struct tdb_context *tdb);
-
-/* Lock entire database. */
-enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
-                                 enum tdb_lock_flags flags, bool upgradable);
-void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype);
-enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb, off_t start);
-
-/* Serialize db open. */
-enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb,
-                            int ltype, enum tdb_lock_flags flags);
-void tdb_unlock_open(struct tdb_context *tdb, int ltype);
-bool tdb_has_open_lock(struct tdb_context *tdb);
-
-/* Serialize db expand. */
-enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype);
-void tdb_unlock_expand(struct tdb_context *tdb, int ltype);
-bool tdb_has_expansion_lock(struct tdb_context *tdb);
-
-/* If it needs recovery, grab all the locks and do it. */
-enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb);
-
-/* Default lock and unlock functions. */
-int tdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, void *);
-int tdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *);
-
-/* transaction.c: */
-enum TDB_ERROR tdb_transaction_recover(struct tdb_context *tdb);
-tdb_bool_err tdb_needs_recovery(struct tdb_context *tdb);
-
-struct tdb_context {
-       /* Single list of all TDBs, to detect multiple opens. */
-       struct tdb_context *next;
-
-       /* Filename of the database. */
-       const char *name;
-
-       /* Logging function */
-       void (*log_fn)(struct tdb_context *tdb,
-                      enum tdb_log_level level,
-                      enum TDB_ERROR ecode,
-                      const char *message,
-                      void *data);
-       void *log_data;
-
-       /* Open flags passed to tdb_open. */
-       int open_flags;
-
-       /* low level (fnctl) lock functions. */
-       int (*lock_fn)(int fd, int rw, off_t off, off_t len, bool w, void *);
-       int (*unlock_fn)(int fd, int rw, off_t off, off_t len, void *);
-       void *lock_data;
-
-       /* the tdb flags passed to tdb_open. */
-       uint32_t flags;
-
-       /* Our statistics. */
-       struct tdb_attribute_stats stats;
-
-       /* The actual file information */
-       struct tdb_file *file;
-
-       /* Hash function. */
-       uint64_t (*hash_fn)(const void *key, size_t len, uint64_t seed, void *);
-       void *hash_data;
-       uint64_t hash_seed;
-
-       /* Our open hook, if any. */
-       enum TDB_ERROR (*openhook)(int fd, void *data);
-       void *openhook_data;
-
-       /* Last error we returned. */
-       enum TDB_ERROR last_error;
-
-       /* Are we accessing directly? (debugging check). */
-       int direct_access;
-
-       /* Set if we are in a transaction. */
-       struct tdb_transaction *transaction;
-
-       /* What free table are we using? */
-       tdb_off_t ftable_off;
-       unsigned int ftable;
-
-       /* IO methods: changes for transactions. */
-       const struct tdb_methods *io;
-
-       /* Direct access information */
-       struct tdb_access_hdr *access;
-};
-
-/* tdb.c: */
-enum TDB_ERROR COLD PRINTF_FMT(4, 5)
-       tdb_logerr(struct tdb_context *tdb,
-                  enum TDB_ERROR ecode,
-                  enum tdb_log_level level,
-                  const char *fmt, ...);
-
-#ifdef TDB_TRACE
-void tdb_trace(struct tdb_context *tdb, const char *op);
-void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op);
-void tdb_trace_open(struct tdb_context *tdb, const char *op,
-                   unsigned hash_size, unsigned tdb_flags, unsigned open_flags);
-void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret);
-void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret);
-void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
-                   TDB_DATA rec);
-void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
-                       TDB_DATA rec, int ret);
-void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
-                          TDB_DATA rec, TDB_DATA ret);
-void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
-                            TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
-                            int ret);
-void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
-                          TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret);
-#else
-#define tdb_trace(tdb, op)
-#define tdb_trace_seqnum(tdb, seqnum, op)
-#define tdb_trace_open(tdb, op, hash_size, tdb_flags, open_flags)
-#define tdb_trace_ret(tdb, op, ret)
-#define tdb_trace_retrec(tdb, op, ret)
-#define tdb_trace_1rec(tdb, op, rec)
-#define tdb_trace_1rec_ret(tdb, op, rec, ret)
-#define tdb_trace_1rec_retrec(tdb, op, rec, ret)
-#define tdb_trace_2rec_flag_ret(tdb, op, rec1, rec2, flag, ret)
-#define tdb_trace_2rec_retrec(tdb, op, rec1, rec2, ret)
-#endif /* !TDB_TRACE */
-
-#endif
diff --git a/lib/tdb2/pytdb.c b/lib/tdb2/pytdb.c
deleted file mode 100644 (file)
index 1fa4e58..0000000
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
-   Unix SMB/CIFS implementation.
-
-   Python interface to tdb2.  Simply modified from tdb1 version.
-
-   Copyright (C) 2004-2006 Tim Potter <tpot@samba.org>
-   Copyright (C) 2007-2008 Jelmer Vernooij <jelmer@samba.org>
-   Copyright (C) 2011 Rusty Russell <rusty@rustcorp.com.au>
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <Python.h>
-#include "replace.h"
-#include "system/filesys.h"
-
-#ifndef Py_RETURN_NONE
-#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
-#endif
-
-/* Include tdb headers */
-#include <tdb2.h>
-
-typedef struct {
-       PyObject_HEAD
-       struct tdb_context *ctx;
-       bool closed;
-} PyTdbObject;
-
-staticforward PyTypeObject PyTdb;
-
-static void PyErr_SetTDBError(enum TDB_ERROR e)
-{
-       PyErr_SetObject(PyExc_RuntimeError,
-               Py_BuildValue("(i,s)", e, tdb_errorstr(e)));
-}
-
-static TDB_DATA PyString_AsTDB_DATA(PyObject *data)
-{
-       TDB_DATA ret;
-       ret.dptr = (unsigned char *)PyString_AsString(data);
-       ret.dsize = PyString_Size(data);
-       return ret;
-}
-
-static PyObject *PyString_FromTDB_DATA(TDB_DATA data)
-{
-       PyObject *ret = PyString_FromStringAndSize((const char *)data.dptr,
-                                                  data.dsize);
-       free(data.dptr);
-       return ret;
-}
-
-#define PyErr_TDB_ERROR_IS_ERR_RAISE(ret) \
-       if (ret != TDB_SUCCESS) { \
-               PyErr_SetTDBError(ret); \
-               return NULL; \
-       }
-
-static void stderr_log(struct tdb_context *tdb,
-                      enum tdb_log_level level,
-                      enum TDB_ERROR ecode,
-                      const char *message,
-                      void *data)
-{
-       fprintf(stderr, "%s:%s:%s\n",
-               tdb_name(tdb), tdb_errorstr(ecode), message);
-}
-
-static PyObject *py_tdb_open(PyTypeObject *type, PyObject *args, PyObject *kwargs)
-{
-       char *name = NULL;
-       int tdb_flags = TDB_DEFAULT, flags = O_RDWR, mode = 0600;
-       struct tdb_context *ctx;
-       PyTdbObject *ret;
-       union tdb_attribute logattr;
-       const char *kwnames[] = { "name", "tdb_flags", "flags", "mode", NULL };
-
-       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siii", cast_const2(char **, kwnames), &name, &tdb_flags, &flags, &mode))
-               return NULL;
-
-       if (name == NULL) {
-               tdb_flags |= TDB_INTERNAL;
-       }
-
-       logattr.log.base.attr = TDB_ATTRIBUTE_LOG;
-       logattr.log.base.next = NULL;
-       logattr.log.fn = stderr_log;
-       ctx = tdb_open(name, tdb_flags, flags, mode, &logattr);
-       if (ctx == NULL) {
-               PyErr_SetFromErrno(PyExc_IOError);
-               return NULL;
-       }
-
-       ret = PyObject_New(PyTdbObject, &PyTdb);
-       if (!ret) {
-               tdb_close(ctx);
-               return NULL;
-       }
-
-       ret->ctx = ctx;
-       ret->closed = false;
-       return (PyObject *)ret;
-}
-
-static PyObject *obj_transaction_cancel(PyTdbObject *self)
-{
-       tdb_transaction_cancel(self->ctx);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_transaction_commit(PyTdbObject *self)
-{
-       enum TDB_ERROR ret = tdb_transaction_commit(self->ctx);
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_transaction_prepare_commit(PyTdbObject *self)
-{
-       enum TDB_ERROR ret = tdb_transaction_prepare_commit(self->ctx);
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_transaction_start(PyTdbObject *self)
-{
-       enum TDB_ERROR ret = tdb_transaction_start(self->ctx);
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_lockall(PyTdbObject *self)
-{
-       enum TDB_ERROR ret = tdb_lockall(self->ctx);
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_unlockall(PyTdbObject *self)
-{
-       tdb_unlockall(self->ctx);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_lockall_read(PyTdbObject *self)
-{
-       enum TDB_ERROR ret = tdb_lockall_read(self->ctx);
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_unlockall_read(PyTdbObject *self)
-{
-       tdb_unlockall_read(self->ctx);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_close(PyTdbObject *self)
-{
-       int ret;
-       if (self->closed)
-               Py_RETURN_NONE;
-       ret = tdb_close(self->ctx);
-       self->closed = true;
-       if (ret != 0) {
-               PyErr_SetTDBError(TDB_ERR_IO);
-               return NULL;
-       }
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_get(PyTdbObject *self, PyObject *args)
-{
-       TDB_DATA key, data;
-       PyObject *py_key;
-       enum TDB_ERROR ret;
-       if (!PyArg_ParseTuple(args, "O", &py_key))
-               return NULL;
-
-       key = PyString_AsTDB_DATA(py_key);
-       ret = tdb_fetch(self->ctx, key, &data);
-       if (ret == TDB_ERR_NOEXIST)
-               Py_RETURN_NONE;
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-       return PyString_FromTDB_DATA(data);
-}
-
-static PyObject *obj_append(PyTdbObject *self, PyObject *args)
-{
-       TDB_DATA key, data;
-       PyObject *py_key, *py_data;
-       enum TDB_ERROR ret;
-       if (!PyArg_ParseTuple(args, "OO", &py_key, &py_data))
-               return NULL;
-
-       key = PyString_AsTDB_DATA(py_key);
-       data = PyString_AsTDB_DATA(py_data);
-
-       ret = tdb_append(self->ctx, key, data);
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_firstkey(PyTdbObject *self)
-{
-       enum TDB_ERROR ret;
-       TDB_DATA key;
-
-       ret = tdb_firstkey(self->ctx, &key);
-       if (ret == TDB_ERR_NOEXIST)
-               Py_RETURN_NONE;
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-
-       return PyString_FromTDB_DATA(key);
-}
-
-static PyObject *obj_nextkey(PyTdbObject *self, PyObject *args)
-{
-       TDB_DATA key;
-       PyObject *py_key;
-       enum TDB_ERROR ret;
-       if (!PyArg_ParseTuple(args, "O", &py_key))
-               return NULL;
-
-       /* Malloc here, since tdb_nextkey frees. */
-       key.dsize = PyString_Size(py_key);
-       key.dptr = malloc(key.dsize);
-       memcpy(key.dptr, PyString_AsString(py_key), key.dsize);
-
-       ret = tdb_nextkey(self->ctx, &key);
-       if (ret == TDB_ERR_NOEXIST)
-               Py_RETURN_NONE;
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-
-       return PyString_FromTDB_DATA(key);
-}
-
-static PyObject *obj_delete(PyTdbObject *self, PyObject *args)
-{
-       TDB_DATA key;
-       PyObject *py_key;
-       enum TDB_ERROR ret;
-       if (!PyArg_ParseTuple(args, "O", &py_key))
-               return NULL;
-
-       key = PyString_AsTDB_DATA(py_key);
-       ret = tdb_delete(self->ctx, key);
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_has_key(PyTdbObject *self, PyObject *args)
-{
-       TDB_DATA key;
-       PyObject *py_key;
-       if (!PyArg_ParseTuple(args, "O", &py_key))
-               return NULL;
-
-       key = PyString_AsTDB_DATA(py_key);
-       if (tdb_exists(self->ctx, key))
-               return Py_True;
-       if (tdb_error(self->ctx) != TDB_ERR_NOEXIST)
-               PyErr_TDB_ERROR_IS_ERR_RAISE(tdb_error(self->ctx));
-       return Py_False;
-}
-
-static PyObject *obj_store(PyTdbObject *self, PyObject *args)
-{
-       TDB_DATA key, value;
-       enum TDB_ERROR ret;
-       int flag = TDB_REPLACE;
-       PyObject *py_key, *py_value;
-
-       if (!PyArg_ParseTuple(args, "OO|i", &py_key, &py_value, &flag))
-               return NULL;
-
-       key = PyString_AsTDB_DATA(py_key);
-       value = PyString_AsTDB_DATA(py_value);
-
-       ret = tdb_store(self->ctx, key, value, flag);
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_add_flag(PyTdbObject *self, PyObject *args)
-{
-       unsigned flag;
-
-       if (!PyArg_ParseTuple(args, "I", &flag))
-               return NULL;
-
-       tdb_add_flag(self->ctx, flag);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_remove_flag(PyTdbObject *self, PyObject *args)
-{
-       unsigned flag;
-
-       if (!PyArg_ParseTuple(args, "I", &flag))
-               return NULL;
-
-       tdb_remove_flag(self->ctx, flag);
-       Py_RETURN_NONE;
-}
-
-typedef struct {
-       PyObject_HEAD
-       TDB_DATA current;
-       bool end;
-       PyTdbObject *iteratee;
-} PyTdbIteratorObject;
-
-static PyObject *tdb_iter_next(PyTdbIteratorObject *self)
-{
-       enum TDB_ERROR e;
-       PyObject *ret;
-       if (self->end)
-               return NULL;
-       ret = PyString_FromStringAndSize((const char *)self->current.dptr,
-                                        self->current.dsize);
-       e = tdb_nextkey(self->iteratee->ctx, &self->current);
-       if (e == TDB_ERR_NOEXIST)
-               self->end = true;
-       else
-               PyErr_TDB_ERROR_IS_ERR_RAISE(e);
-       return ret;
-}
-
-static void tdb_iter_dealloc(PyTdbIteratorObject *self)
-{
-       Py_DECREF(self->iteratee);
-       PyObject_Del(self);
-}
-
-PyTypeObject PyTdbIterator = {
-       .tp_name = "Iterator",
-       .tp_basicsize = sizeof(PyTdbIteratorObject),
-       .tp_iternext = (iternextfunc)tdb_iter_next,
-       .tp_dealloc = (destructor)tdb_iter_dealloc,
-       .tp_flags = Py_TPFLAGS_DEFAULT,
-       .tp_iter = PyObject_SelfIter,
-};
-
-static PyObject *tdb_object_iter(PyTdbObject *self)
-{
-       PyTdbIteratorObject *ret;
-       enum TDB_ERROR e;
-
-       ret = PyObject_New(PyTdbIteratorObject, &PyTdbIterator);
-       if (!ret)
-               return NULL;
-       e = tdb_firstkey(self->ctx, &ret->current);
-       if (e == TDB_ERR_NOEXIST) {
-               ret->end = true;
-       } else {
-               PyErr_TDB_ERROR_IS_ERR_RAISE(e);
-               ret->end = false;
-       }
-       ret->iteratee = self;
-       Py_INCREF(self);
-       return (PyObject *)ret;
-}
-
-static PyObject *obj_clear(PyTdbObject *self)
-{
-       enum TDB_ERROR ret = tdb_wipe_all(self->ctx);
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-       Py_RETURN_NONE;
-}
-
-static PyObject *obj_enable_seqnum(PyTdbObject *self)
-{
-       tdb_add_flag(self->ctx, TDB_SEQNUM);
-       Py_RETURN_NONE;
-}
-
-static PyMethodDef tdb_object_methods[] = {
-       { "transaction_cancel", (PyCFunction)obj_transaction_cancel, METH_NOARGS,
-               "S.transaction_cancel() -> None\n"
-               "Cancel the currently active transaction." },
-       { "transaction_commit", (PyCFunction)obj_transaction_commit, METH_NOARGS,
-               "S.transaction_commit() -> None\n"
-               "Commit the currently active transaction." },
-       { "transaction_prepare_commit", (PyCFunction)obj_transaction_prepare_commit, METH_NOARGS,
-               "S.transaction_prepare_commit() -> None\n"
-               "Prepare to commit the currently active transaction" },
-       { "transaction_start", (PyCFunction)obj_transaction_start, METH_NOARGS,
-               "S.transaction_start() -> None\n"
-               "Start a new transaction." },
-       { "lock_all", (PyCFunction)obj_lockall, METH_NOARGS, NULL },
-       { "unlock_all", (PyCFunction)obj_unlockall, METH_NOARGS, NULL },
-       { "read_lock_all", (PyCFunction)obj_lockall_read, METH_NOARGS, NULL },
-       { "read_unlock_all", (PyCFunction)obj_unlockall_read, METH_NOARGS, NULL },
-       { "close", (PyCFunction)obj_close, METH_NOARGS, NULL },
-       { "get", (PyCFunction)obj_get, METH_VARARGS, "S.get(key) -> value\n"
-               "Fetch a value." },
-       { "append", (PyCFunction)obj_append, METH_VARARGS, "S.append(key, value) -> None\n"
-               "Append data to an existing key." },
-       { "firstkey", (PyCFunction)obj_firstkey, METH_NOARGS, "S.firstkey() -> data\n"
-               "Return the first key in this database." },
-       { "nextkey", (PyCFunction)obj_nextkey, METH_NOARGS, "S.nextkey(key) -> data\n"
-               "Return the next key in this database." },
-       { "delete", (PyCFunction)obj_delete, METH_VARARGS, "S.delete(key) -> None\n"
-               "Delete an entry." },
-       { "has_key", (PyCFunction)obj_has_key, METH_VARARGS, "S.has_key(key) -> None\n"
-               "Check whether key exists in this database." },
-       { "store", (PyCFunction)obj_store, METH_VARARGS, "S.store(key, data, flag=REPLACE) -> None"
-               "Store data." },
-       { "add_flag", (PyCFunction)obj_add_flag, METH_VARARGS, "S.add_flag(flag) -> None" },
-       { "remove_flag", (PyCFunction)obj_remove_flag, METH_VARARGS, "S.remove_flag(flag) -> None" },
-       { "iterkeys", (PyCFunction)tdb_object_iter, METH_NOARGS, "S.iterkeys() -> iterator" },
-       { "clear", (PyCFunction)obj_clear, METH_NOARGS, "S.clear() -> None\n"
-               "Wipe the entire database." },
-       { "enable_seqnum", (PyCFunction)obj_enable_seqnum, METH_NOARGS,
-               "S.enable_seqnum() -> None" },
-       { NULL }
-};
-
-static PyObject *obj_get_flags(PyTdbObject *self, void *closure)
-{
-       return PyInt_FromLong(tdb_get_flags(self->ctx));
-}
-
-static PyObject *obj_get_filename(PyTdbObject *self, void *closure)
-{
-       return PyString_FromString(tdb_name(self->ctx));
-}
-
-static PyObject *obj_get_seqnum(PyTdbObject *self, void *closure)
-{
-       return PyInt_FromLong(tdb_get_seqnum(self->ctx));
-}
-
-
-static PyGetSetDef tdb_object_getsetters[] = {
-       { cast_const(char *, "flags"), (getter)obj_get_flags, NULL, NULL },
-       { cast_const(char *, "filename"), (getter)obj_get_filename, NULL,
-         cast_const(char *, "The filename of this TDB file.")},
-       { cast_const(char *, "seqnum"), (getter)obj_get_seqnum, NULL, NULL },
-       { NULL }
-};
-
-static PyObject *tdb_object_repr(PyTdbObject *self)
-{
-       if (tdb_get_flags(self->ctx) & TDB_INTERNAL) {
-               return PyString_FromString("Tdb(<internal>)");
-       } else {
-               return PyString_FromFormat("Tdb('%s')", tdb_name(self->ctx));
-       }
-}
-
-static void tdb_object_dealloc(PyTdbObject *self)
-{
-       if (!self->closed)
-               tdb_close(self->ctx);
-       self->ob_type->tp_free(self);
-}
-
-static PyObject *obj_getitem(PyTdbObject *self, PyObject *key)
-{
-       TDB_DATA tkey, val;
-       enum TDB_ERROR ret;
-
-       if (!PyString_Check(key)) {
-               PyErr_SetString(PyExc_TypeError, "Expected string as key");
-               return NULL;
-       }
-
-       tkey.dptr = (unsigned char *)PyString_AsString(key);
-       tkey.dsize = PyString_Size(key);
-
-       ret = tdb_fetch(self->ctx, tkey, &val);
-       if (ret == TDB_ERR_NOEXIST) {
-               PyErr_SetString(PyExc_KeyError, "No such TDB entry");
-               return NULL;
-       } else {
-               PyErr_TDB_ERROR_IS_ERR_RAISE(ret);
-               return PyString_FromTDB_DATA(val);
-       }
-}
-
-static int obj_setitem(PyTdbObject *self, PyObject *key, PyObject *value)
-{
-       TDB_DATA tkey, tval;
-       enum TDB_ERROR ret;
-       if (!PyString_Check(key)) {
-               PyErr_SetString(PyExc_TypeError, "Expected string as key");
-               return -1;
-       }
-
-       tkey = PyString_AsTDB_DATA(key);
-
-       if (value == NULL) {
-               ret = tdb_delete(self->ctx, tkey);
-       } else {
-               if (!PyString_Check(value)) {
-                       PyErr_SetString(PyExc_TypeError, "Expected string as value");
-                       return -1;
-               }
-
-               tval = PyString_AsTDB_DATA(value);
-
-               ret = tdb_store(self->ctx, tkey, tval, TDB_REPLACE);
-       }
-
-       if (ret != TDB_SUCCESS) {
-               PyErr_SetTDBError(ret);
-               return -1;
-       }
-
-       return ret;
-}
-
-static PyMappingMethods tdb_object_mapping = {
-       .mp_subscript = (binaryfunc)obj_getitem,
-       .mp_ass_subscript = (objobjargproc)obj_setitem,
-};
-static PyTypeObject PyTdb = {
-       .tp_name = "tdb.Tdb",
-       .tp_basicsize = sizeof(PyTdbObject),
-       .tp_methods = tdb_object_methods,
-       .tp_getset = tdb_object_getsetters,
-       .tp_new = py_tdb_open,
-       .tp_doc = "A TDB file",
-       .tp_repr = (reprfunc)tdb_object_repr,
-       .tp_dealloc = (destructor)tdb_object_dealloc,
-       .tp_as_mapping = &tdb_object_mapping,
-       .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_ITER,
-       .tp_iter = (getiterfunc)tdb_object_iter,
-};
-
-static PyMethodDef tdb_methods[] = {
-       { "open", (PyCFunction)py_tdb_open, METH_VARARGS|METH_KEYWORDS, "open(name, hash_size=0, tdb_flags=TDB_DEFAULT, flags=O_RDWR, mode=0600)\n"
-               "Open a TDB file." },
-       { NULL }
-};
-
-void inittdb(void);
-void inittdb(void)
-{
-       PyObject *m;
-
-       if (PyType_Ready(&PyTdb) < 0)
-               return;
-
-       if (PyType_Ready(&PyTdbIterator) < 0)
-               return;
-
-       m = Py_InitModule3("tdb", tdb_methods, "TDB is a simple key-value database similar to GDBM that supports multiple writers.");
-       if (m == NULL)
-               return;
-
-       PyModule_AddObject(m, "REPLACE", PyInt_FromLong(TDB_REPLACE));
-       PyModule_AddObject(m, "INSERT", PyInt_FromLong(TDB_INSERT));
-       PyModule_AddObject(m, "MODIFY", PyInt_FromLong(TDB_MODIFY));
-
-       PyModule_AddObject(m, "DEFAULT", PyInt_FromLong(TDB_DEFAULT));
-       PyModule_AddObject(m, "INTERNAL", PyInt_FromLong(TDB_INTERNAL));
-       PyModule_AddObject(m, "NOLOCK", PyInt_FromLong(TDB_NOLOCK));
-       PyModule_AddObject(m, "NOMMAP", PyInt_FromLong(TDB_NOMMAP));
-       PyModule_AddObject(m, "CONVERT", PyInt_FromLong(TDB_CONVERT));
-       PyModule_AddObject(m, "NOSYNC", PyInt_FromLong(TDB_NOSYNC));
-       PyModule_AddObject(m, "SEQNUM", PyInt_FromLong(TDB_SEQNUM));
-       PyModule_AddObject(m, "ALLOW_NESTING", PyInt_FromLong(TDB_ALLOW_NESTING));
-
-       PyModule_AddObject(m, "__docformat__", PyString_FromString("restructuredText"));
-
-       PyModule_AddObject(m, "__version__", PyString_FromString(PACKAGE_VERSION));
-
-       Py_INCREF(&PyTdb);
-       PyModule_AddObject(m, "Tdb", (PyObject *)&PyTdb);
-
-       Py_INCREF(&PyTdbIterator);
-}
diff --git a/lib/tdb2/tdb.c b/lib/tdb2/tdb.c
deleted file mode 100644 (file)
index 5257aa1..0000000
+++ /dev/null
@@ -1,605 +0,0 @@
- /*
-   Trivial Database 2: fetch, store and misc routines.
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#ifndef HAVE_LIBREPLACE
-#include <ccan/asprintf/asprintf.h>
-#include <stdarg.h>
-#endif
-
-static enum TDB_ERROR update_rec_hdr(struct tdb_context *tdb,
-                                    tdb_off_t off,
-                                    tdb_len_t keylen,
-                                    tdb_len_t datalen,
-                                    struct tdb_used_record *rec,
-                                    uint64_t h)
-{
-       uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
-       enum TDB_ERROR ecode;
-
-       ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen,
-                          keylen + dataroom, h);
-       if (ecode == TDB_SUCCESS) {
-               ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec));
-       }
-       return ecode;
-}
-
-static enum TDB_ERROR replace_data(struct tdb_context *tdb,
-                                  struct hash_info *h,
-                                  struct tdb_data key, struct tdb_data dbuf,
-                                  tdb_off_t old_off, tdb_len_t old_room,
-                                  bool growing)
-{
-       tdb_off_t new_off;
-       enum TDB_ERROR ecode;
-
-       /* Allocate a new record. */
-       new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC,
-                       growing);
-       if (TDB_OFF_IS_ERR(new_off)) {
-               return TDB_OFF_TO_ERR(new_off);
-       }
-
-       /* We didn't like the existing one: remove it. */
-       if (old_off) {
-               tdb->stats.frees++;
-               ecode = add_free_record(tdb, old_off,
-                                       sizeof(struct tdb_used_record)
-                                       + key.dsize + old_room,
-                                       TDB_LOCK_WAIT, true);
-               if (ecode == TDB_SUCCESS)
-                       ecode = replace_in_hash(tdb, h, new_off);
-       } else {
-               ecode = add_to_hash(tdb, h, new_off);
-       }
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       new_off += sizeof(struct tdb_used_record);
-       ecode = tdb->io->twrite(tdb, new_off, key.dptr, key.dsize);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       new_off += key.dsize;
-       ecode = tdb->io->twrite(tdb, new_off, dbuf.dptr, dbuf.dsize);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (tdb->flags & TDB_SEQNUM)
-               tdb_inc_seqnum(tdb);
-
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR update_data(struct tdb_context *tdb,
-                                 tdb_off_t off,
-                                 struct tdb_data dbuf,
-                                 tdb_len_t extra)
-{
-       enum TDB_ERROR ecode;
-
-       ecode = tdb->io->twrite(tdb, off, dbuf.dptr, dbuf.dsize);
-       if (ecode == TDB_SUCCESS && extra) {
-               /* Put a zero in; future versions may append other data. */
-               ecode = tdb->io->twrite(tdb, off + dbuf.dsize, "", 1);
-       }
-       if (tdb->flags & TDB_SEQNUM)
-               tdb_inc_seqnum(tdb);
-
-       return ecode;
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_store(struct tdb_context *tdb,
-                        struct tdb_data key, struct tdb_data dbuf, int flag)
-{
-       struct hash_info h;
-       tdb_off_t off;
-       tdb_len_t old_room = 0;
-       struct tdb_used_record rec;
-       enum TDB_ERROR ecode;
-
-       off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(off);
-       }
-
-       /* Now we have lock on this hash bucket. */
-       if (flag == TDB_INSERT) {
-               if (off) {
-                       ecode = TDB_ERR_EXISTS;
-                       goto out;
-               }
-       } else {
-               if (off) {
-                       old_room = rec_data_length(&rec)
-                               + rec_extra_padding(&rec);
-                       if (old_room >= dbuf.dsize) {
-                               /* Can modify in-place.  Easy! */
-                               ecode = update_rec_hdr(tdb, off,
-                                                      key.dsize, dbuf.dsize,
-                                                      &rec, h.h);
-                               if (ecode != TDB_SUCCESS) {
-                                       goto out;
-                               }
-                               ecode = update_data(tdb,
-                                                   off + sizeof(rec)
-                                                   + key.dsize, dbuf,
-                                                   old_room - dbuf.dsize);
-                               if (ecode != TDB_SUCCESS) {
-                                       goto out;
-                               }
-                               tdb_unlock_hashes(tdb, h.hlock_start,
-                                                 h.hlock_range, F_WRLCK);
-                               return tdb->last_error = TDB_SUCCESS;
-                       }
-               } else {
-                       if (flag == TDB_MODIFY) {
-                               /* if the record doesn't exist and we
-                                  are in TDB_MODIFY mode then we should fail
-                                  the store */
-                               ecode = TDB_ERR_NOEXIST;
-                               goto out;
-                       }
-               }
-       }
-
-       /* If we didn't use the old record, this implies we're growing. */
-       ecode = replace_data(tdb, &h, key, dbuf, off, old_room, off);
-out:
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
-       return tdb->last_error = ecode;
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_append(struct tdb_context *tdb,
-                         struct tdb_data key, struct tdb_data dbuf)
-{
-       struct hash_info h;
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       tdb_len_t old_room = 0, old_dlen;
-       unsigned char *newdata;
-       struct tdb_data new_dbuf;
-       enum TDB_ERROR ecode;
-
-       off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(off);
-       }
-
-       if (off) {
-               old_dlen = rec_data_length(&rec);
-               old_room = old_dlen + rec_extra_padding(&rec);
-
-               /* Fast path: can append in place. */
-               if (rec_extra_padding(&rec) >= dbuf.dsize) {
-                       ecode = update_rec_hdr(tdb, off, key.dsize,
-                                              old_dlen + dbuf.dsize, &rec,
-                                              h.h);
-                       if (ecode != TDB_SUCCESS) {
-                               goto out;
-                       }
-
-                       off += sizeof(rec) + key.dsize + old_dlen;
-                       ecode = update_data(tdb, off, dbuf,
-                                           rec_extra_padding(&rec));
-                       goto out;
-               }
-
-               /* Slow path. */
-               newdata = malloc(key.dsize + old_dlen + dbuf.dsize);
-               if (!newdata) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                          "tdb_append:"
-                                          " failed to allocate %zu bytes",
-                                          (size_t)(key.dsize + old_dlen
-                                                   + dbuf.dsize));
-                       goto out;
-               }
-               ecode = tdb->io->tread(tdb, off + sizeof(rec) + key.dsize,
-                                      newdata, old_dlen);
-               if (ecode != TDB_SUCCESS) {
-                       goto out_free_newdata;
-               }
-               memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
-               new_dbuf.dptr = newdata;
-               new_dbuf.dsize = old_dlen + dbuf.dsize;
-       } else {
-               newdata = NULL;
-               new_dbuf = dbuf;
-       }
-
-       /* If they're using tdb_append(), it implies they're growing record. */
-       ecode = replace_data(tdb, &h, key, new_dbuf, off, old_room, true);
-
-out_free_newdata:
-       free(newdata);
-out:
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
-       return tdb->last_error = ecode;
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key,
-                        struct tdb_data *data)
-{
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       struct hash_info h;
-       enum TDB_ERROR ecode;
-
-       off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(off);
-       }
-
-       if (!off) {
-               ecode = TDB_ERR_NOEXIST;
-       } else {
-               data->dsize = rec_data_length(&rec);
-               data->dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize,
-                                           data->dsize);
-               if (TDB_PTR_IS_ERR(data->dptr)) {
-                       ecode = TDB_PTR_ERR(data->dptr);
-               } else
-                       ecode = TDB_SUCCESS;
-       }
-
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-       return tdb->last_error = ecode;
-}
-
-_PUBLIC_ bool tdb_exists(struct tdb_context *tdb, TDB_DATA key)
-{
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       struct hash_info h;
-
-       off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               tdb->last_error = TDB_OFF_TO_ERR(off);
-               return false;
-       }
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-
-       tdb->last_error = TDB_SUCCESS;
-       return off ? true : false;
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key)
-{
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       struct hash_info h;
-       enum TDB_ERROR ecode;
-
-       off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(off);
-       }
-
-       if (!off) {
-               ecode = TDB_ERR_NOEXIST;
-               goto unlock;
-       }
-
-       ecode = delete_from_hash(tdb, &h);
-       if (ecode != TDB_SUCCESS) {
-               goto unlock;
-       }
-
-       /* Free the deleted entry. */
-       tdb->stats.frees++;
-       ecode = add_free_record(tdb, off,
-                               sizeof(struct tdb_used_record)
-                               + rec_key_length(&rec)
-                               + rec_data_length(&rec)
-                               + rec_extra_padding(&rec),
-                               TDB_LOCK_WAIT, true);
-
-       if (tdb->flags & TDB_SEQNUM)
-               tdb_inc_seqnum(tdb);
-
-unlock:
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
-       return tdb->last_error = ecode;
-}
-
-_PUBLIC_ unsigned int tdb_get_flags(struct tdb_context *tdb)
-{
-       return tdb->flags;
-}
-
-static bool inside_transaction(const struct tdb_context *tdb)
-{
-       return tdb->transaction != NULL;
-}
-
-static bool readonly_changable(struct tdb_context *tdb, const char *caller)
-{
-       if (inside_transaction(tdb)) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "%s: can't change"
-                                            " TDB_RDONLY inside transaction",
-                                            caller);
-               return false;
-       }
-       return true;
-}
-
-_PUBLIC_ void tdb_add_flag(struct tdb_context *tdb, unsigned flag)
-{
-       if (tdb->flags & TDB_INTERNAL) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_add_flag: internal db");
-               return;
-       }
-       switch (flag) {
-       case TDB_NOLOCK:
-               tdb->flags |= TDB_NOLOCK;
-               break;
-       case TDB_NOMMAP:
-               tdb->flags |= TDB_NOMMAP;
-#ifndef HAVE_INCOHERENT_MMAP
-               tdb_munmap(tdb->file);
-#endif
-               break;
-       case TDB_NOSYNC:
-               tdb->flags |= TDB_NOSYNC;
-               break;
-       case TDB_SEQNUM:
-               tdb->flags |= TDB_SEQNUM;
-               break;
-       case TDB_ALLOW_NESTING:
-               tdb->flags |= TDB_ALLOW_NESTING;
-               break;
-       case TDB_RDONLY:
-               if (readonly_changable(tdb, "tdb_add_flag"))
-                       tdb->flags |= TDB_RDONLY;
-               break;
-       default:
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_add_flag: Unknown flag %u",
-                                            flag);
-       }
-}
-
-_PUBLIC_ void tdb_remove_flag(struct tdb_context *tdb, unsigned flag)
-{
-       if (tdb->flags & TDB_INTERNAL) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_remove_flag: internal db");
-               return;
-       }
-       switch (flag) {
-       case TDB_NOLOCK:
-               tdb->flags &= ~TDB_NOLOCK;
-               break;
-       case TDB_NOMMAP:
-               tdb->flags &= ~TDB_NOMMAP;
-#ifndef HAVE_INCOHERENT_MMAP
-               /* If mmap incoherent, we were mmaping anyway. */
-               tdb_mmap(tdb);
-#endif
-               break;
-       case TDB_NOSYNC:
-               tdb->flags &= ~TDB_NOSYNC;
-               break;
-       case TDB_SEQNUM:
-               tdb->flags &= ~TDB_SEQNUM;
-               break;
-       case TDB_ALLOW_NESTING:
-               tdb->flags &= ~TDB_ALLOW_NESTING;
-               break;
-       case TDB_RDONLY:
-               if ((tdb->open_flags & O_ACCMODE) == O_RDONLY) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                                    TDB_LOG_USE_ERROR,
-                                                    "tdb_remove_flag: can't"
-                                                    " remove TDB_RDONLY on tdb"
-                                                    " opened with O_RDONLY");
-                       break;
-               }
-               if (readonly_changable(tdb, "tdb_remove_flag"))
-                       tdb->flags &= ~TDB_RDONLY;
-               break;
-       default:
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_remove_flag: Unknown flag %u",
-                                            flag);
-       }
-}
-
-_PUBLIC_ const char *tdb_errorstr(enum TDB_ERROR ecode)
-{
-       /* Gcc warns if you miss a case in the switch, so use that. */
-       switch (TDB_ERR_TO_OFF(ecode)) {
-       case TDB_ERR_TO_OFF(TDB_SUCCESS): return "Success";
-       case TDB_ERR_TO_OFF(TDB_ERR_CORRUPT): return "Corrupt database";
-       case TDB_ERR_TO_OFF(TDB_ERR_IO): return "IO Error";
-       case TDB_ERR_TO_OFF(TDB_ERR_LOCK): return "Locking error";
-       case TDB_ERR_TO_OFF(TDB_ERR_OOM): return "Out of memory";
-       case TDB_ERR_TO_OFF(TDB_ERR_EXISTS): return "Record exists";
-       case TDB_ERR_TO_OFF(TDB_ERR_EINVAL): return "Invalid parameter";
-       case TDB_ERR_TO_OFF(TDB_ERR_NOEXIST): return "Record does not exist";
-       case TDB_ERR_TO_OFF(TDB_ERR_RDONLY): return "write not permitted";
-       }
-       return "Invalid error code";
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_error(struct tdb_context *tdb)
-{
-       return tdb->last_error;
-}
-
-enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb,
-                              enum TDB_ERROR ecode,
-                              enum tdb_log_level level,
-                              const char *fmt, ...)
-{
-       char *message;
-       va_list ap;
-       size_t len;
-       /* tdb_open paths care about errno, so save it. */
-       int saved_errno = errno;
-
-       if (!tdb->log_fn)
-               return ecode;
-
-       va_start(ap, fmt);
-       len = vasprintf(&message, fmt, ap);
-       va_end(ap);
-
-       if (len < 0) {
-               tdb->log_fn(tdb, TDB_LOG_ERROR, TDB_ERR_OOM,
-                           "out of memory formatting message:", tdb->log_data);
-               tdb->log_fn(tdb, level, ecode, fmt, tdb->log_data);
-       } else {
-               tdb->log_fn(tdb, level, ecode, message, tdb->log_data);
-               free(message);
-       }
-       errno = saved_errno;
-       return ecode;
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb,
-                                TDB_DATA key,
-                                enum TDB_ERROR (*parse)(TDB_DATA k,
-                                                        TDB_DATA d,
-                                                        void *data),
-                                void *data)
-{
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       struct hash_info h;
-       enum TDB_ERROR ecode;
-
-       off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(off);
-       }
-
-       if (!off) {
-               ecode = TDB_ERR_NOEXIST;
-       } else {
-               const void *dptr;
-               dptr = tdb_access_read(tdb, off + sizeof(rec) + key.dsize,
-                                      rec_data_length(&rec), false);
-               if (TDB_PTR_IS_ERR(dptr)) {
-                       ecode = TDB_PTR_ERR(dptr);
-               } else {
-                       TDB_DATA d = tdb_mkdata(dptr, rec_data_length(&rec));
-
-                       ecode = parse(key, d, data);
-                       tdb_access_release(tdb, dptr);
-               }
-       }
-
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-       return tdb->last_error = ecode;
-}
-
-_PUBLIC_ const char *tdb_name(const struct tdb_context *tdb)
-{
-       return tdb->name;
-}
-
-_PUBLIC_ int64_t tdb_get_seqnum(struct tdb_context *tdb)
-{
-       tdb_off_t off;
-
-       off = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
-       if (TDB_OFF_IS_ERR(off))
-               tdb->last_error = TDB_OFF_TO_ERR(off);
-       else
-               tdb->last_error = TDB_SUCCESS;
-       return off;
-}
-
-
-_PUBLIC_ int tdb_fd(const struct tdb_context *tdb)
-{
-       return tdb->file->fd;
-}
-
-struct traverse_state {
-       enum TDB_ERROR error;
-       struct tdb_context *dest_db;
-};
-
-/*
-  traverse function for repacking
- */
-static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
-                          struct traverse_state *state)
-{
-       state->error = tdb_store(state->dest_db, key, data, TDB_INSERT);
-       if (state->error != TDB_SUCCESS) {
-               return -1;
-       }
-       return 0;
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_repack(struct tdb_context *tdb)
-{
-       struct tdb_context *tmp_db;
-       struct traverse_state state;
-
-       state.error = tdb_transaction_start(tdb);
-       if (state.error != TDB_SUCCESS) {
-               return state.error;
-       }
-
-       tmp_db = tdb_open("tmpdb", TDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL);
-       if (tmp_db == NULL) {
-               state.error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                        __location__
-                                        " Failed to create tmp_db");
-               tdb_transaction_cancel(tdb);
-               return tdb->last_error = state.error;
-       }
-
-       state.dest_db = tmp_db;
-       if (tdb_traverse(tdb, repack_traverse, &state) < 0) {
-               goto fail;
-       }
-
-       state.error = tdb_wipe_all(tdb);
-       if (state.error != TDB_SUCCESS) {
-               goto fail;
-       }
-
-       state.dest_db = tdb;
-       if (tdb_traverse(tmp_db, repack_traverse, &state) < 0) {
-               goto fail;
-       }
-
-       tdb_close(tmp_db);
-       return tdb_transaction_commit(tdb);
-
-fail:
-       tdb_transaction_cancel(tdb);
-       tdb_close(tmp_db);
-       return state.error;
-}
diff --git a/lib/tdb2/tdb2.h b/lib/tdb2/tdb2.h
deleted file mode 100644 (file)
index f7aa0cc..0000000
+++ /dev/null
@@ -1,897 +0,0 @@
-#ifndef CCAN_TDB2_H
-#define CCAN_TDB2_H
-
-/*
-   TDB version 2: trivial database library
-
-   Copyright (C) Andrew Tridgell 1999-2004
-   Copyright (C) Rusty Russell 2010-2011
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifdef  __cplusplus
-extern "C" {
-#endif
-
-#ifdef HAVE_LIBREPLACE
-#include <replace.h>
-#else
-#if HAVE_FILE_OFFSET_BITS
-#define _FILE_OFFSET_BITS 64
-#endif
-/* For mode_t */
-#include <sys/types.h>
-/* For O_* flags. */
-#include <sys/stat.h>
-/* For sig_atomic_t. */
-#include <signal.h>
-/* For uint64_t */
-#include <stdint.h>
-/* For bool */
-#include <stdbool.h>
-/* For memcmp */
-#include <string.h>
-#endif
-
-#if HAVE_CCAN
-#include <ccan/compiler/compiler.h>
-#include <ccan/typesafe_cb/typesafe_cb.h>
-#include <ccan/cast/cast.h>
-#else
-#ifndef typesafe_cb_preargs
-/* Failing to have CCAN just mean less typesafe protection, etc. */
-#define typesafe_cb_preargs(rtype, atype, fn, arg, ...)        \
-       ((rtype (*)(__VA_ARGS__, atype))(fn))
-#endif
-#ifndef cast_const
-#if defined(__intptr_t_defined) || defined(HAVE_INTPTR_T)
-#define cast_const(type, expr) ((type)((intptr_t)(expr)))
-#else
-#define cast_const(type, expr) ((type *)(expr))
-#endif
-#endif
-#endif /* !HAVE_CCAN */
-
-union tdb_attribute;
-struct tdb_context;
-
-/**
- * tdb_open - open a database file
- * @name: the file name (can be NULL if flags contains TDB_INTERNAL)
- * @tdb_flags: options for this database
- * @open_flags: flags argument for tdb's open() call.
- * @mode: mode argument for tdb's open() call.
- * @attributes: linked list of extra attributes for this tdb.
- *
- * This call opens (and potentially creates) a database file.
- * Multiple processes can have the TDB file open at once.
- *
- * On failure it will return NULL, and set errno: it may also call
- * any log attribute found in @attributes.
- *
- * See also:
- *     union tdb_attribute
- */
-struct tdb_context *tdb_open(const char *name, int tdb_flags,
-                            int open_flags, mode_t mode,
-                            union tdb_attribute *attributes);
-
-
-/* flags for tdb_open() */
-#define TDB_DEFAULT 0 /* just a readability place holder */
-#define TDB_INTERNAL 2 /* don't store on disk */
-#define TDB_NOLOCK   4 /* don't do any locking */
-#define TDB_NOMMAP   8 /* don't use mmap */
-#define TDB_CONVERT 16 /* convert endian */
-#define TDB_NOSYNC   64 /* don't use synchronous transactions */
-#define TDB_SEQNUM   128 /* maintain a sequence number */
-#define TDB_ALLOW_NESTING   256 /* fake nested transactions */
-#define TDB_RDONLY   512 /* implied by O_RDONLY */
-#define TDB_CANT_CHECK  2048 /* has a feature which we don't understand */
-
-/**
- * tdb_close - close and free a tdb.
- * @tdb: the tdb context returned from tdb_open()
- *
- * This always succeeds, in that @tdb is unusable after this call.  But if
- * some unexpected error occurred while closing, it will return non-zero
- * (the only clue as to cause will be via the log attribute).
- */
-int tdb_close(struct tdb_context *tdb);
-
-/**
- * struct tdb_data - representation of keys or values.
- * @dptr: the data pointer
- * @dsize: the size of the data pointed to by dptr.
- *
- * This is the "blob" representation of keys and data used by TDB.
- */
-typedef struct tdb_data {
-       unsigned char *dptr;
-       size_t dsize;
-} TDB_DATA;
-
-/**
- * enum TDB_ERROR - error returns for TDB
- *
- * See Also:
- *     tdb_errorstr()
- */
-enum TDB_ERROR {
-       TDB_SUCCESS     = 0,    /* No error. */
-       TDB_ERR_CORRUPT = -1,   /* We read the db, and it was bogus. */
-       TDB_ERR_IO      = -2,   /* We couldn't read/write the db. */
-       TDB_ERR_LOCK    = -3,   /* Locking failed. */
-       TDB_ERR_OOM     = -4,   /* Out of Memory. */
-       TDB_ERR_EXISTS  = -5,   /* The key already exists. */
-       TDB_ERR_NOEXIST = -6,   /* The key does not exist. */
-       TDB_ERR_EINVAL  = -7,   /* You're using it wrong. */
-       TDB_ERR_RDONLY  = -8,   /* The database is read-only. */
-       TDB_ERR_LAST = TDB_ERR_RDONLY
-};
-
-/**
- * tdb_store - store a key/value pair in a tdb.
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key
- * @dbuf: the data to associate with the key.
- * @flag: TDB_REPLACE, TDB_INSERT or TDB_MODIFY.
- *
- * This inserts (or overwrites) a key/value pair in the TDB.  If flag
- * is TDB_REPLACE, it doesn't matter whether the key exists or not;
- * TDB_INSERT means it must not exist (returns TDB_ERR_EXISTS otherwise),
- * and TDB_MODIFY means it must exist (returns TDB_ERR_NOEXIST otherwise).
- *
- * On success, this returns TDB_SUCCESS.
- *
- * See also:
- *     tdb_fetch, tdb_transaction_start, tdb_append, tdb_delete.
- */
-enum TDB_ERROR tdb_store(struct tdb_context *tdb,
-                        struct tdb_data key,
-                        struct tdb_data dbuf,
-                        int flag);
-
-/* flags to tdb_store() */
-#define TDB_REPLACE 1          /* A readability place holder */
-#define TDB_INSERT 2           /* Don't overwrite an existing entry */
-#define TDB_MODIFY 3           /* Don't create an existing entry    */
-
-/**
- * tdb_fetch - fetch a value from a tdb.
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key
- * @data: pointer to data.
- *
- * This looks up a key in the database and sets it in @data.
- *
- * If it returns TDB_SUCCESS, the key was found: it is your
- * responsibility to call free() on @data->dptr.
- *
- * Otherwise, it returns an error (usually, TDB_ERR_NOEXIST) and @data is
- * undefined.
- */
-enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key,
-                        struct tdb_data *data);
-
-/**
- * tdb_errorstr - map the tdb error onto a constant readable string
- * @ecode: the enum TDB_ERROR to map.
- *
- * This is useful for displaying errors to users.
- */
-const char *tdb_errorstr(enum TDB_ERROR ecode);
-
-/**
- * tdb_append - append a value to a key/value pair in a tdb.
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key
- * @dbuf: the data to append.
- *
- * This is equivalent to fetching a record, reallocating .dptr to add the
- * data, and writing it back, only it's much more efficient.  If the key
- * doesn't exist, it's equivalent to tdb_store (with an additional hint that
- * you expect to expand the record in future).
- *
- * See Also:
- *     tdb_fetch(), tdb_store()
- */
-enum TDB_ERROR tdb_append(struct tdb_context *tdb,
-                         struct tdb_data key, struct tdb_data dbuf);
-
-/**
- * tdb_delete - delete a key from a tdb.
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to delete.
- *
- * Returns TDB_SUCCESS on success, or an error (usually TDB_ERR_NOEXIST).
- *
- * See Also:
- *     tdb_fetch(), tdb_store()
- */
-enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key);
-
-/**
- * tdb_exists - does a key exist in the database?
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to search for.
- *
- * Returns true if it exists, or false if it doesn't or any other error.
- */
-bool tdb_exists(struct tdb_context *tdb, TDB_DATA key);
-
-/**
- * tdb_deq - are struct tdb_data equal?
- * @a: one struct tdb_data
- * @b: another struct tdb_data
- */
-static inline bool tdb_deq(struct tdb_data a, struct tdb_data b)
-{
-       return a.dsize == b.dsize && memcmp(a.dptr, b.dptr, a.dsize) == 0;
-}
-
-/**
- * tdb_mkdata - make a struct tdb_data from const data
- * @p: the constant pointer
- * @len: the length
- *
- * As the dptr member of struct tdb_data is not constant, you need to
- * cast it.  This function keeps thost casts in one place, as well as
- * suppressing the warning some compilers give when casting away a
- * qualifier (eg. gcc with -Wcast-qual)
- */
-static inline struct tdb_data tdb_mkdata(const void *p, size_t len)
-{
-       struct tdb_data d;
-       d.dptr = cast_const(void *, p);
-       d.dsize = len;
-       return d;
-}
-
-/**
- * tdb_transaction_start - start a transaction
- * @tdb: the tdb context returned from tdb_open()
- *
- * This begins a series of atomic operations.  Other processes will be able
- * to read the tdb, but not alter it (they will block), nor will they see
- * any changes until tdb_transaction_commit() is called.
- *
- * Note that if the TDB_ALLOW_NESTING flag is set, a tdb_transaction_start()
- * within a transaction will succeed, but it's not a real transaction:
- * (1) An inner transaction which is committed is not actually committed until
- *     the outer transaction is; if the outer transaction is cancelled, the
- *     inner ones are discarded.
- * (2) tdb_transaction_cancel() marks the outer transaction as having an error,
- *     so the final tdb_transaction_commit() will fail.
- * (3) the outer transaction will see the results of the inner transaction.
- *
- * See Also:
- *     tdb_transaction_cancel, tdb_transaction_commit.
- */
-enum TDB_ERROR tdb_transaction_start(struct tdb_context *tdb);
-
-/**
- * tdb_transaction_cancel - abandon a transaction
- * @tdb: the tdb context returned from tdb_open()
- *
- * This aborts a transaction, discarding any changes which were made.
- * tdb_close() does this implicitly.
- */
-void tdb_transaction_cancel(struct tdb_context *tdb);
-
-/**
- * tdb_transaction_commit - commit a transaction
- * @tdb: the tdb context returned from tdb_open()
- *
- * This completes a transaction, writing any changes which were made.
- *
- * fsync() is used to commit the transaction (unless TDB_NOSYNC is set),
- * making it robust against machine crashes, but very slow compared to
- * other TDB operations.
- *
- * A failure can only be caused by unexpected errors (eg. I/O or
- * memory); this is no point looping on transaction failure.
- *
- * See Also:
- *     tdb_transaction_prepare_commit()
- */
-enum TDB_ERROR tdb_transaction_commit(struct tdb_context *tdb);
-
-/**
- * tdb_transaction_prepare_commit - prepare to commit a transaction
- * @tdb: the tdb context returned from tdb_open()
- *
- * This ensures we have the resources to commit a transaction (using
- * tdb_transaction_commit): if this succeeds then a transaction will only
- * fail if the write() or fsync() calls fail.
- *
- * If this fails you must still call tdb_transaction_cancel() to cancel
- * the transaction.
- *
- * See Also:
- *     tdb_transaction_commit()
- */
-enum TDB_ERROR tdb_transaction_prepare_commit(struct tdb_context *tdb);
-
-/**
- * tdb_traverse - traverse a TDB
- * @tdb: the tdb context returned from tdb_open()
- * @fn: the function to call for every key/value pair (or NULL)
- * @p: the pointer to hand to @f
- *
- * This walks the TDB until all they keys have been traversed, or @fn
- * returns non-zero.  If the traverse function or other processes are
- * changing data or adding or deleting keys, the traverse may be
- * unreliable: keys may be skipped or (rarely) visited twice.
- *
- * There is one specific exception: the special case of deleting the
- * current key does not undermine the reliability of the traversal.
- *
- * On success, returns the number of keys iterated.  On error returns
- * a negative enum TDB_ERROR value.
- */
-#define tdb_traverse(tdb, fn, p)                                       \
-       tdb_traverse_(tdb, typesafe_cb_preargs(int, void *, (fn), (p),  \
-                                              struct tdb_context *,    \
-                                              TDB_DATA, TDB_DATA), (p))
-
-int64_t tdb_traverse_(struct tdb_context *tdb,
-                     int (*fn)(struct tdb_context *,
-                               TDB_DATA, TDB_DATA, void *), void *p);
-
-/**
- * tdb_parse_record - operate directly on data in the database.
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key whose record we should hand to @parse
- * @parse: the function to call for the data
- * @data: the private pointer to hand to @parse (types must match).
- *
- * This avoids a copy for many cases, by handing you a pointer into
- * the memory-mapped database.  It also locks the record to prevent
- * other accesses at the same time.
- *
- * Do not alter the data handed to parse()!
- */
-#define tdb_parse_record(tdb, key, parse, data)                                \
-       tdb_parse_record_((tdb), (key),                                 \
-                         typesafe_cb_preargs(enum TDB_ERROR, void *,   \
-                                             (parse), (data),          \
-                                             TDB_DATA, TDB_DATA), (data))
-
-enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb,
-                                TDB_DATA key,
-                                enum TDB_ERROR (*parse)(TDB_DATA k,
-                                                        TDB_DATA d,
-                                                        void *data),
-                                void *data);
-
-/**
- * tdb_get_seqnum - get a database sequence number
- * @tdb: the tdb context returned from tdb_open()
- *
- * This returns a sequence number: any change to the database from a
- * tdb context opened with the TDB_SEQNUM flag will cause that number
- * to increment.  Note that the incrementing is unreliable (it is done
- * without locking), so this is only useful as an optimization.
- *
- * For example, you may have a regular database backup routine which
- * does not operate if the sequence number is unchanged.  In the
- * unlikely event of a failed increment, it will be backed up next
- * time any way.
- *
- * Returns an enum TDB_ERROR (ie. negative) on error.
- */
-int64_t tdb_get_seqnum(struct tdb_context *tdb);
-
-/**
- * tdb_firstkey - get the "first" key in a TDB
- * @tdb: the tdb context returned from tdb_open()
- * @key: pointer to key.
- *
- * This returns an arbitrary key in the database; with tdb_nextkey() it allows
- * open-coded traversal of the database, though it is slightly less efficient
- * than tdb_traverse.
- *
- * It is your responsibility to free @key->dptr on success.
- *
- * Returns TDB_ERR_NOEXIST if the database is empty.
- */
-enum TDB_ERROR tdb_firstkey(struct tdb_context *tdb, struct tdb_data *key);
-
-/**
- * tdb_nextkey - get the "next" key in a TDB
- * @tdb: the tdb context returned from tdb_open()
- * @key: a key returned by tdb_firstkey() or tdb_nextkey().
- *
- * This returns another key in the database; it will free @key.dptr for
- * your convenience.
- *
- * Returns TDB_ERR_NOEXIST if there are no more keys.
- */
-enum TDB_ERROR tdb_nextkey(struct tdb_context *tdb, struct tdb_data *key);
-
-/**
- * tdb_chainlock - lock a record in the TDB
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to lock.
- *
- * This prevents any access occurring to a group of keys including @key,
- * even if @key does not exist.  This allows primitive atomic updates of
- * records without using transactions.
- *
- * You cannot begin a transaction while holding a tdb_chainlock(), nor can
- * you do any operations on any other keys in the database.  This also means
- * that you cannot hold more than one tdb_chainlock() at a time.
- *
- * See Also:
- *     tdb_chainunlock()
- */
-enum TDB_ERROR tdb_chainlock(struct tdb_context *tdb, TDB_DATA key);
-
-/**
- * tdb_chainunlock - unlock a record in the TDB
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to unlock.
- *
- * The key must have previously been locked by tdb_chainlock().
- */
-void tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key);
-
-/**
- * tdb_chainlock_read - lock a record in the TDB, for reading
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to lock.
- *
- * This prevents any changes from occurring to a group of keys including @key,
- * even if @key does not exist.  This allows primitive atomic updates of
- * records without using transactions.
- *
- * You cannot begin a transaction while holding a tdb_chainlock_read(), nor can
- * you do any operations on any other keys in the database.  This also means
- * that you cannot hold more than one tdb_chainlock()/read() at a time.
- *
- * See Also:
- *     tdb_chainlock()
- */
-enum TDB_ERROR tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key);
-
-/**
- * tdb_chainunlock_read - unlock a record in the TDB for reading
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to unlock.
- *
- * The key must have previously been locked by tdb_chainlock_read().
- */
-void tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key);
-
-/**
- * tdb_lockall - lock the entire TDB
- * @tdb: the tdb context returned from tdb_open()
- *
- * You cannot hold a tdb_chainlock while calling this.  It nests, so you
- * must call tdb_unlockall as many times as you call tdb_lockall.
- */
-enum TDB_ERROR tdb_lockall(struct tdb_context *tdb);
-
-/**
- * tdb_unlockall - unlock the entire TDB
- * @tdb: the tdb context returned from tdb_open()
- */
-void tdb_unlockall(struct tdb_context *tdb);
-
-/**
- * tdb_lockall_read - lock the entire TDB for reading
- * @tdb: the tdb context returned from tdb_open()
- *
- * This prevents others writing to the database, eg. tdb_delete, tdb_store,
- * tdb_append, but not tdb_fetch.
- *
- * You cannot hold a tdb_chainlock while calling this.  It nests, so you
- * must call tdb_unlockall_read as many times as you call tdb_lockall_read.
- */
-enum TDB_ERROR tdb_lockall_read(struct tdb_context *tdb);
-
-/**
- * tdb_unlockall_read - unlock the entire TDB for reading
- * @tdb: the tdb context returned from tdb_open()
- */
-void tdb_unlockall_read(struct tdb_context *tdb);
-
-/**
- * tdb_wipe_all - wipe the database clean
- * @tdb: the tdb context returned from tdb_open()
- *
- * Completely erase the database.  This is faster than iterating through
- * each key and doing tdb_delete.
- */
-enum TDB_ERROR tdb_wipe_all(struct tdb_context *tdb);
-
-/**
- * tdb_repack - repack the database
- * @tdb: the tdb context returned from tdb_open()
- *
- * This repacks the database; if it is suffering from a great deal of
- * fragmentation this might help.  However, it can take twice the
- * memory of the existing TDB.
- */
-enum TDB_ERROR tdb_repack(struct tdb_context *tdb);
-
-/**
- * tdb_check - check a TDB for consistency
- * @tdb: the tdb context returned from tdb_open()
- * @check: function to check each key/data pair (or NULL)
- * @data: argument for @check, must match type.
- *
- * This performs a consistency check of the open database, optionally calling
- * a check() function on each record so you can do your own data consistency
- * checks as well.  If check() returns an error, that is returned from
- * tdb_check().
- *
- * Note that the TDB uses a feature which we don't understand which
- * indicates we can't run tdb_check(), this will log a warning to that
- * effect and return TDB_SUCCESS.  You can detect this condition by
- * looking for TDB_CANT_CHECK in tdb_get_flags().
- *
- * Returns TDB_SUCCESS or an error.
- */
-#define tdb_check(tdb, check, data)                                    \
-       tdb_check_((tdb), typesafe_cb_preargs(enum TDB_ERROR, void *,   \
-                                             (check), (data),          \
-                                             struct tdb_data,          \
-                                             struct tdb_data),         \
-                  (data))
-
-enum TDB_ERROR tdb_check_(struct tdb_context *tdb,
-                         enum TDB_ERROR (*check)(struct tdb_data k,
-                                                 struct tdb_data d,
-                                                 void *data),
-                         void *data);
-
-/**
- * tdb_error - get the last error (not threadsafe)
- * @tdb: the tdb context returned from tdb_open()
- *
- * Returns the last error returned by a TDB function.
- *
- * This makes porting from TDB1 easier, but note that the last error is not
- * reliable in threaded programs.
- */
-enum TDB_ERROR tdb_error(struct tdb_context *tdb);
-
-/**
- * enum tdb_summary_flags - flags for tdb_summary.
- */
-enum tdb_summary_flags {
-       TDB_SUMMARY_HISTOGRAMS = 1 /* Draw graphs in the summary. */
-};
-
-/**
- * tdb_summary - return a string describing the TDB state
- * @tdb: the tdb context returned from tdb_open()
- * @flags: flags to control the summary output.
- * @summary: pointer to string to allocate.
- *
- * This returns a developer-readable string describing the overall
- * state of the tdb, such as the percentage used and sizes of records.
- * It is designed to provide information about the tdb at a glance
- * without displaying any keys or data in the database.
- *
- * On success, sets @summary to point to a malloc()'ed nul-terminated
- * multi-line string.  It is your responsibility to free() it.
- */
-enum TDB_ERROR tdb_summary(struct tdb_context *tdb,
-                          enum tdb_summary_flags flags,
-                          char **summary);
-
-
-/**
- * tdb_get_flags - return the flags for a tdb
- * @tdb: the tdb context returned from tdb_open()
- *
- * This returns the flags on the current tdb.  Some of these are caused by
- * the flags argument to tdb_open(), others (such as TDB_CONVERT) are
- * intuited.
- */
-unsigned int tdb_get_flags(struct tdb_context *tdb);
-
-/**
- * tdb_add_flag - set a flag for a tdb
- * @tdb: the tdb context returned from tdb_open()
- * @flag: one of TDB_NOLOCK, TDB_NOMMAP, TDB_NOSYNC or TDB_ALLOW_NESTING.
- *
- * You can use this to set a flag on the TDB.  You cannot set these flags
- * on a TDB_INTERNAL tdb.
- */
-void tdb_add_flag(struct tdb_context *tdb, unsigned flag);
-
-/**
- * tdb_remove_flag - unset a flag for a tdb
- * @tdb: the tdb context returned from tdb_open()
- * @flag: one of TDB_NOLOCK, TDB_NOMMAP, TDB_NOSYNC or TDB_ALLOW_NESTING.
- *
- * You can use this to clear a flag on the TDB.  You cannot clear flags
- * on a TDB_INTERNAL tdb.
- */
-void tdb_remove_flag(struct tdb_context *tdb, unsigned flag);
-
-/**
- * enum tdb_attribute_type - descriminator for union tdb_attribute.
- */
-enum tdb_attribute_type {
-       TDB_ATTRIBUTE_LOG = 0,
-       TDB_ATTRIBUTE_HASH = 1,
-       TDB_ATTRIBUTE_SEED = 2,
-       TDB_ATTRIBUTE_STATS = 3,
-       TDB_ATTRIBUTE_OPENHOOK = 4,
-       TDB_ATTRIBUTE_FLOCK = 5,
-};
-
-/**
- * tdb_get_attribute - get an attribute for an existing tdb
- * @tdb: the tdb context returned from tdb_open()
- * @attr: the union tdb_attribute to set.
- *
- * This gets an attribute from a TDB which has previously been set (or
- * may return the default values).  Set @attr.base.attr to the
- * attribute type you want get.
- */
-enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb,
-                                union tdb_attribute *attr);
-
-/**
- * tdb_set_attribute - set an attribute for an existing tdb
- * @tdb: the tdb context returned from tdb_open()
- * @attr: the union tdb_attribute to set.
- *
- * This sets an attribute on a TDB, overriding any previous attribute
- * of the same type.  It returns TDB_ERR_EINVAL if the attribute is
- * unknown or invalid.
- *
- * Note that TDB_ATTRIBUTE_HASH, TDB_ATTRIBUTE_SEED, and
- * TDB_ATTRIBUTE_OPENHOOK cannot currently be set after tdb_open.
- */
-enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb,
-                                const union tdb_attribute *attr);
-
-/**
- * tdb_unset_attribute - reset an attribute for an existing tdb
- * @tdb: the tdb context returned from tdb_open()
- * @type: the attribute type to unset.
- *
- * This unsets an attribute on a TDB, returning it to the defaults
- * (where applicable).
- *
- * Note that it only makes sense for TDB_ATTRIBUTE_LOG and TDB_ATTRIBUTE_FLOCK
- * to be unset.
- */
-void tdb_unset_attribute(struct tdb_context *tdb,
-                        enum tdb_attribute_type type);
-
-/**
- * tdb_name - get the name of a tdb
- * @tdb: the tdb context returned from tdb_open()
- *
- * This returns a copy of the name string, made at tdb_open() time.  If that
- * argument was NULL (possible for a TDB_INTERNAL db) this will return NULL.
- *
- * This is mostly useful for logging.
- */
-const char *tdb_name(const struct tdb_context *tdb);
-
-/**
- * tdb_fd - get the file descriptor of a tdb
- * @tdb: the tdb context returned from tdb_open()
- *
- * This returns the file descriptor for the underlying database file, or -1
- * for TDB_INTERNAL.
- */
-int tdb_fd(const struct tdb_context *tdb);
-
-/**
- * tdb_foreach - iterate through every open TDB.
- * @fn: the function to call for every TDB
- * @p: the pointer to hand to @fn
- *
- * TDB internally keeps track of all open TDBs; this function allows you to
- * iterate through them.  If @fn returns non-zero, traversal stops.
- */
-#define tdb_foreach(fn, p)                                             \
-       tdb_foreach_(typesafe_cb_preargs(int, void *, (fn), (p),        \
-                                        struct tdb_context *), (p))
-
-void tdb_foreach_(int (*fn)(struct tdb_context *, void *), void *p);
-
-/**
- * struct tdb_attribute_base - common fields for all tdb attributes.
- */
-struct tdb_attribute_base {
-       enum tdb_attribute_type attr;
-       union tdb_attribute *next;
-};
-
-/**
- * enum tdb_log_level - log levels for tdb_attribute_log
- * @TDB_LOG_ERROR: used to log unrecoverable errors such as I/O errors
- *                or internal consistency failures.
- * @TDB_LOG_USE_ERROR: used to log usage errors such as invalid parameters
- *                or writing to a read-only database.
- * @TDB_LOG_WARNING: used for informational messages on issues which
- *                  are unusual but handled by TDB internally, such
- *                  as a failure to mmap or failure to open /dev/urandom.
- */
-enum tdb_log_level {
-       TDB_LOG_ERROR,
-       TDB_LOG_USE_ERROR,
-       TDB_LOG_WARNING
-};
-
-/**
- * struct tdb_attribute_log - log function attribute
- *
- * This attribute provides a hook for you to log errors.
- */
-struct tdb_attribute_log {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */
-       void (*fn)(struct tdb_context *tdb,
-                  enum tdb_log_level level,
-                  enum TDB_ERROR ecode,
-                  const char *message,
-                  void *data);
-       void *data;
-};
-
-/**
- * struct tdb_attribute_hash - hash function attribute
- *
- * This attribute allows you to provide an alternative hash function.
- * This hash function will be handed keys from the database; it will also
- * be handed the 8-byte TDB_HASH_MAGIC value for checking the header (the
- * tdb_open() will fail if the hash value doesn't match the header).
- *
- * Note that if your hash function gives different results on
- * different machine endians, your tdb will no longer work across
- * different architectures!
- */
-struct tdb_attribute_hash {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */
-       uint64_t (*fn)(const void *key, size_t len, uint64_t seed,
-                      void *data);
-       void *data;
-};
-
-/**
- * struct tdb_attribute_seed - hash function seed attribute
- *
- * The hash function seed is normally taken from /dev/urandom (or equivalent)
- * but can be set manually here.  This is mainly for testing purposes.
- */
-struct tdb_attribute_seed {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_SEED */
-       uint64_t seed;
-};
-
-/**
- * struct tdb_attribute_stats - tdb operational statistics
- *
- * This attribute records statistics of various low-level TDB operations.
- * This can be used to assist performance evaluation.  This is only
- * useful for tdb_get_attribute().
- *
- * New fields will be added at the end, hence the "size" argument which
- * indicates how large your structure is: it must be filled in before
- * calling tdb_get_attribute(), which will overwrite it with the size
- * tdb knows about.
- */
-struct tdb_attribute_stats {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_STATS */
-       size_t size; /* = sizeof(struct tdb_attribute_stats) */
-       uint64_t allocs;
-       uint64_t   alloc_subhash;
-       uint64_t   alloc_chain;
-       uint64_t   alloc_bucket_exact;
-       uint64_t   alloc_bucket_max;
-       uint64_t   alloc_leftover;
-       uint64_t   alloc_coalesce_tried;
-       uint64_t     alloc_coalesce_iterate_clash;
-       uint64_t     alloc_coalesce_lockfail;
-       uint64_t     alloc_coalesce_race;
-       uint64_t     alloc_coalesce_succeeded;
-       uint64_t       alloc_coalesce_num_merged;
-       uint64_t compares;
-       uint64_t   compare_wrong_bucket;
-       uint64_t   compare_wrong_offsetbits;
-       uint64_t   compare_wrong_keylen;
-       uint64_t   compare_wrong_rechash;
-       uint64_t   compare_wrong_keycmp;
-       uint64_t transactions;
-       uint64_t   transaction_cancel;
-       uint64_t   transaction_nest;
-       uint64_t   transaction_expand_file;
-       uint64_t   transaction_read_direct;
-       uint64_t      transaction_read_direct_fail;
-       uint64_t   transaction_write_direct;
-       uint64_t      transaction_write_direct_fail;
-       uint64_t expands;
-       uint64_t frees;
-       uint64_t locks;
-       uint64_t   lock_lowlevel;
-       uint64_t   lock_nonblock;
-       uint64_t     lock_nonblock_fail;
-};
-
-/**
- * struct tdb_attribute_openhook - tdb special effects hook for open
- *
- * This attribute contains a function to call once we have the OPEN_LOCK
- * for the tdb, but before we've examined its contents.  If this succeeds,
- * the tdb will be populated if it's then zero-length.
- *
- * This is a hack to allow support for TDB1-style TDB_CLEAR_IF_FIRST
- * behaviour.
- */
-struct tdb_attribute_openhook {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_OPENHOOK */
-       enum TDB_ERROR (*fn)(int fd, void *data);
-       void *data;
-};
-
-/**
- * struct tdb_attribute_flock - tdb special effects hook for file locking
- *
- * This attribute contains function to call to place locks on a file; it can
- * be used to support non-blocking operations or lock proxying.
- *
- * They should return 0 on success, -1 on failure and set errno.
- *
- * An error will be logged on error if errno is neither EAGAIN nor EINTR
- * (normally it would only return EAGAIN if waitflag is false, and
- * loop internally on EINTR).
- */
-struct tdb_attribute_flock {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_FLOCK */
-       int (*lock)(int fd,int rw, off_t off, off_t len, bool waitflag, void *);
-       int (*unlock)(int fd, int rw, off_t off, off_t len, void *);
-       void *data;
-};
-
-/**
- * union tdb_attribute - tdb attributes.
- *
- * This represents all the known attributes.
- *
- * See also:
- *     struct tdb_attribute_log, struct tdb_attribute_hash,
- *     struct tdb_attribute_seed, struct tdb_attribute_stats,
- *     struct tdb_attribute_openhook, struct tdb_attribute_flock.
- */
-union tdb_attribute {
-       struct tdb_attribute_base base;
-       struct tdb_attribute_log log;
-       struct tdb_attribute_hash hash;
-       struct tdb_attribute_seed seed;
-       struct tdb_attribute_stats stats;
-       struct tdb_attribute_openhook openhook;
-       struct tdb_attribute_flock flock;
-};
-
-#ifdef  __cplusplus
-}
-#endif
-
-#endif /* tdb2.h */
diff --git a/lib/tdb2/test/api-13-delete.c b/lib/tdb2/test/api-13-delete.c
deleted file mode 100644 (file)
index 279b386..0000000
+++ /dev/null
@@ -1,205 +0,0 @@
-#include "private.h" // For TDB_TOPLEVEL_HASH_BITS
-#include <ccan/hash/hash.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "tdb2.h"
-#include "tap-interface.h"
-#include "logging.h"
-
-/* We rig the hash so adjacent-numbered records always clash. */
-static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
-{
-       return ((uint64_t)*(const unsigned int *)key)
-               << (64 - TDB_TOPLEVEL_HASH_BITS - 1);
-}
-
-/* We use the same seed which we saw a failure on. */
-static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
-{
-       return hash64_stable((const unsigned char *)key, len,
-                            *(uint64_t *)p);
-}
-
-static bool store_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data d, data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < 1000; i++) {
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-               tdb_fetch(tdb, key, &d);
-               if (!tdb_deq(d, data))
-                       return false;
-               free(d.dptr);
-       }
-       return true;
-}
-
-static void test_val(struct tdb_context *tdb, uint64_t val)
-{
-       uint64_t v;
-       struct tdb_data key = { (unsigned char *)&v, sizeof(v) };
-       struct tdb_data d, data = { (unsigned char *)&v, sizeof(v) };
-
-       /* Insert an entry, then delete it. */
-       v = val;
-       /* Delete should fail. */
-       ok1(tdb_delete(tdb, key) == TDB_ERR_NOEXIST);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Insert should succeed. */
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Delete should succeed. */
-       ok1(tdb_delete(tdb, key) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Re-add it, then add collision. */
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-       v = val + 1;
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Can find both? */
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-       v = val;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-
-       /* Delete second one. */
-       v = val + 1;
-       ok1(tdb_delete(tdb, key) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Re-add */
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Now, try deleting first one. */
-       v = val;
-       ok1(tdb_delete(tdb, key) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Can still find second? */
-       v = val + 1;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-
-       /* Now, this will be ideally placed. */
-       v = val + 2;
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* This will collide with both. */
-       v = val;
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-
-       /* We can still find them all, right? */
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-       v = val + 1;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-       v = val + 2;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-
-       /* And if we delete val + 1, that val + 2 should not move! */
-       v = val + 1;
-       ok1(tdb_delete(tdb, key) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       v = val;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-       v = val + 2;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-
-       /* Delete those two, so we are empty. */
-       ok1(tdb_delete(tdb, key) == 0);
-       v = val;
-       ok1(tdb_delete(tdb, key) == 0);
-
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       uint64_t seed = 16014841315512641303ULL;
-       union tdb_attribute clash_hattr
-               = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                             .fn = clash } };
-       union tdb_attribute fixed_hattr
-               = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                             .fn = fixedhash,
-                             .data = &seed } };
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       /* These two values gave trouble before. */
-       int vals[] = { 755, 837 };
-
-       clash_hattr.base.next = &tap_log_attr;
-       fixed_hattr.base.next = &tap_log_attr;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0])
-                  * (39 * 3 + 5 + sizeof(vals)/sizeof(vals[0])*2) + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-13-delete.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &clash_hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Check start of hash table. */
-               test_val(tdb, 0);
-
-               /* Check end of hash table. */
-               test_val(tdb, -1ULL);
-
-               /* Check mixed bitpattern. */
-               test_val(tdb, 0x123456789ABCDEF0ULL);
-
-               ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
-                                  && tdb->file->num_lockrecs == 0));
-               tdb_close(tdb);
-
-               /* Deleting these entries in the db gave problems. */
-               tdb = tdb_open("run-13-delete.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(store_records(tdb));
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               for (j = 0; j < sizeof(vals)/sizeof(vals[0]); j++) {
-                       struct tdb_data key;
-
-                       key.dptr = (unsigned char *)&vals[j];
-                       key.dsize = sizeof(vals[j]);
-                       ok1(tdb_delete(tdb, key) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-               }
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/api-14-exists.c b/lib/tdb2/test/api-14-exists.c
deleted file mode 100644 (file)
index 801c295..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-#include "config.h"
-#include "tdb2.h"
-#include "tap-interface.h"
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-static bool test_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < 1000; i++) {
-               if (tdb_exists(tdb, key))
-                       return false;
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-               if (!tdb_exists(tdb, key))
-                       return false;
-       }
-
-       for (i = 0; i < 1000; i++) {
-               if (!tdb_exists(tdb, key))
-                       return false;
-               if (tdb_delete(tdb, key) != 0)
-                       return false;
-               if (tdb_exists(tdb, key))
-                       return false;
-       }
-       return true;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-14-exists.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (ok1(tdb))
-                       ok1(test_records(tdb));
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/api-16-wipe_all.c b/lib/tdb2/test/api-16-wipe_all.c
deleted file mode 100644 (file)
index 3dfcc7a..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-#include "config.h"
-#include "tdb2.h"
-#include "tap-interface.h"
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-static bool add_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < 1000; i++) {
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-       }
-       return true;
-}
-
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-16-wipe_all.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (ok1(tdb)) {
-                       struct tdb_data key;
-                       ok1(add_records(tdb));
-                       ok1(tdb_wipe_all(tdb) == TDB_SUCCESS);
-                       ok1(tdb_firstkey(tdb, &key) == TDB_ERR_NOEXIST);
-                       tdb_close(tdb);
-               }
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/api-21-parse_record.c b/lib/tdb2/test/api-21-parse_record.c
deleted file mode 100644 (file)
index 150e1c9..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-#include "config.h"
-#include "tdb2.h"
-#include "tap-interface.h"
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-static enum TDB_ERROR parse(TDB_DATA key, TDB_DATA data, TDB_DATA *expected)
-{
-       if (!tdb_deq(data, *expected))
-               return TDB_ERR_EINVAL;
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR parse_err(TDB_DATA key, TDB_DATA data, void *unused)
-{
-       return 100;
-}
-
-static bool test_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < 1000; i++) {
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-       }
-
-       for (i = 0; i < 1000; i++) {
-               if (tdb_parse_record(tdb, key, parse, &data) != TDB_SUCCESS)
-                       return false;
-       }
-
-       if (tdb_parse_record(tdb, key, parse, &data) != TDB_ERR_NOEXIST)
-               return false;
-
-       /* Test error return from parse function. */
-       i = 0;
-       if (tdb_parse_record(tdb, key, parse_err, NULL) != 100)
-               return false;
-
-       return true;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("api-21-parse_record.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (ok1(tdb))
-                       ok1(test_records(tdb));
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/api-55-transaction.c b/lib/tdb2/test/api-55-transaction.c
deleted file mode 100644 (file)
index c474c6a..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-#include "private.h" // struct tdb_context
-#include "tdb2.h"
-#include "tap-interface.h"
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       unsigned char *buffer;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data;
-
-       buffer = malloc(1000);
-       for (i = 0; i < 1000; i++)
-               buffer[i] = i;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 20 + 1);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-55-transaction.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(tdb_transaction_start(tdb) == 0);
-               data.dptr = buffer;
-               data.dsize = 1000;
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-               ok1(data.dsize == 1000);
-               ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
-               free(data.dptr);
-
-               /* Cancelling a transaction means no store */
-               tdb_transaction_cancel(tdb);
-               ok1(tdb->file->allrecord_lock.count == 0
-                   && tdb->file->num_lockrecs == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb_fetch(tdb, key, &data) == TDB_ERR_NOEXIST);
-
-               /* Commit the transaction. */
-               ok1(tdb_transaction_start(tdb) == 0);
-               data.dptr = buffer;
-               data.dsize = 1000;
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-               ok1(data.dsize == 1000);
-               ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
-               free(data.dptr);
-               ok1(tdb_transaction_commit(tdb) == 0);
-               ok1(tdb->file->allrecord_lock.count == 0
-                   && tdb->file->num_lockrecs == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-               ok1(data.dsize == 1000);
-               ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
-               free(data.dptr);
-
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       free(buffer);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/api-81-seqnum.c b/lib/tdb2/test/api-81-seqnum.c
deleted file mode 100644 (file)
index 8bf261d..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "config.h"
-#include "tdb2.h"
-#include "tap-interface.h"
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, seq;
-       struct tdb_context *tdb;
-       struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 15 + 4 * 13);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("api-81-seqnum.tdb", flags[i]|TDB_SEQNUM,
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       continue;
-
-               seq = 0;
-               ok1(tdb_get_seqnum(tdb) == seq);
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb_get_seqnum(tdb) == ++seq);
-               /* Fetch doesn't change seqnum */
-               if (ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS))
-                       free(d.dptr);
-               ok1(tdb_get_seqnum(tdb) == seq);
-               ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
-               ok1(tdb_get_seqnum(tdb) == ++seq);
-
-               ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-               ok1(tdb_get_seqnum(tdb) == ++seq);
-               /* Empty append works */
-               ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
-               ok1(tdb_get_seqnum(tdb) == ++seq);
-
-               ok1(tdb_wipe_all(tdb) == TDB_SUCCESS);
-               ok1(tdb_get_seqnum(tdb) == ++seq);
-
-               if (!(flags[i] & TDB_INTERNAL)) {
-                       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-                       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-                       ok1(tdb_get_seqnum(tdb) == ++seq);
-                       ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
-                       ok1(tdb_get_seqnum(tdb) == ++seq);
-                       ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-                       ok1(tdb_get_seqnum(tdb) == ++seq);
-                       ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
-                       ok1(tdb_get_seqnum(tdb) == seq);
-
-                       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-                       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-                       ok1(tdb_get_seqnum(tdb) == seq + 1);
-                       tdb_transaction_cancel(tdb);
-                       ok1(tdb_get_seqnum(tdb) == seq);
-               }
-               tdb_close(tdb);
-               ok1(tap_log_messages == 0);
-       }
-       return exit_status();
-}
diff --git a/lib/tdb2/test/api-92-get-set-readonly.c b/lib/tdb2/test/api-92-get-set-readonly.c
deleted file mode 100644 (file)
index 46aea7a..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-#include "config.h"
-#include "tdb2.h"
-#include "tap-interface.h"
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 48);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               /* RW -> R0 */
-               tdb = tdb_open("run-92-get-set-readonly.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               ok1(!(tdb_get_flags(tdb) & TDB_RDONLY));
-
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-
-               tdb_add_flag(tdb, TDB_RDONLY);
-               ok1(tdb_get_flags(tdb) & TDB_RDONLY);
-
-               /* Can't store, append, delete. */
-               ok1(tdb_store(tdb, key, data, TDB_MODIFY) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 1);
-               ok1(tdb_append(tdb, key, data) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 2);
-               ok1(tdb_delete(tdb, key) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 3);
-
-               /* Can't start a transaction, or any write lock. */
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 4);
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 5);
-               ok1(tdb_lockall(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 6);
-               ok1(tdb_wipe_all(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 7);
-
-               /* Back to RW. */
-               tdb_remove_flag(tdb, TDB_RDONLY);
-               ok1(!(tdb_get_flags(tdb) & TDB_RDONLY));
-
-               ok1(tdb_store(tdb, key, data, TDB_MODIFY) == TDB_SUCCESS);
-               ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
-               ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-
-               ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-               ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
-
-               ok1(tdb_chainlock(tdb, key) == TDB_SUCCESS);
-               tdb_chainunlock(tdb, key);
-               ok1(tdb_lockall(tdb) == TDB_SUCCESS);
-               tdb_unlockall(tdb);
-               ok1(tdb_wipe_all(tdb) == TDB_SUCCESS);
-               ok1(tap_log_messages == 7);
-
-               tdb_close(tdb);
-
-               /* R0 -> RW */
-               tdb = tdb_open("run-92-get-set-readonly.tdb", flags[i],
-                              O_RDONLY, 0600, &tap_log_attr);
-               ok1(tdb);
-               ok1(tdb_get_flags(tdb) & TDB_RDONLY);
-
-               /* Can't store, append, delete. */
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 8);
-               ok1(tdb_append(tdb, key, data) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 9);
-               ok1(tdb_delete(tdb, key) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 10);
-
-               /* Can't start a transaction, or any write lock. */
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 11);
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 12);
-               ok1(tdb_lockall(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 13);
-               ok1(tdb_wipe_all(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 14);
-
-               /* Can't remove TDB_RDONLY since we opened with O_RDONLY */
-               tdb_remove_flag(tdb, TDB_RDONLY);
-               ok1(tap_log_messages == 15);
-               ok1(tdb_get_flags(tdb) & TDB_RDONLY);
-               tdb_close(tdb);
-
-               ok1(tap_log_messages == 15);
-               tap_log_messages = 0;
-       }
-       return exit_status();
-}
diff --git a/lib/tdb2/test/api-add-remove-flags.c b/lib/tdb2/test/api-add-remove-flags.c
deleted file mode 100644 (file)
index a72b609..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-#include "private.h" // for tdb_context
-#include "tdb2.h"
-#include "tap-interface.h"
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(87);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-add-remove-flags.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(tdb_get_flags(tdb) == tdb->flags);
-               tap_log_messages = 0;
-               tdb_add_flag(tdb, TDB_NOLOCK);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(tdb_get_flags(tdb) & TDB_NOLOCK);
-               }
-
-               tap_log_messages = 0;
-               tdb_add_flag(tdb, TDB_NOMMAP);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(tdb_get_flags(tdb) & TDB_NOMMAP);
-                       ok1(tdb->file->map_ptr == NULL);
-               }
-
-               tap_log_messages = 0;
-               tdb_add_flag(tdb, TDB_NOSYNC);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(tdb_get_flags(tdb) & TDB_NOSYNC);
-               }
-
-               ok1(tdb_get_flags(tdb) == tdb->flags);
-
-               tap_log_messages = 0;
-               tdb_remove_flag(tdb, TDB_NOLOCK);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(!(tdb_get_flags(tdb) & TDB_NOLOCK));
-               }
-
-               tap_log_messages = 0;
-               tdb_remove_flag(tdb, TDB_NOMMAP);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(!(tdb_get_flags(tdb) & TDB_NOMMAP));
-                       ok1(tdb->file->map_ptr != NULL);
-               }
-
-               tap_log_messages = 0;
-               tdb_remove_flag(tdb, TDB_NOSYNC);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(!(tdb_get_flags(tdb) & TDB_NOSYNC));
-               }
-
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/api-open-multiple-times.c b/lib/tdb2/test/api-open-multiple-times.c
deleted file mode 100644 (file)
index 38aea13..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-#include "config.h"
-#include "tdb2.h"
-#include "tap-interface.h"
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb, *tdb2;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 28);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-open-multiple-times.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               tdb2 = tdb_open("run-open-multiple-times.tdb", flags[i],
-                               O_RDWR|O_CREAT, 0600, &tap_log_attr);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb_check(tdb2, NULL, NULL) == 0);
-
-               /* Store in one, fetch in the other. */
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
-               ok1(tdb_fetch(tdb2, key, &d) == TDB_SUCCESS);
-               ok1(tdb_deq(d, data));
-               free(d.dptr);
-
-               /* Vice versa, with delete. */
-               ok1(tdb_delete(tdb2, key) == 0);
-               ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_NOEXIST);
-
-               /* OK, now close first one, check second still good. */
-               ok1(tdb_close(tdb) == 0);
-
-               ok1(tdb_store(tdb2, key, data, TDB_REPLACE) == 0);
-               ok1(tdb_fetch(tdb2, key, &d) == TDB_SUCCESS);
-               ok1(tdb_deq(d, data));
-               free(d.dptr);
-
-               /* Reopen */
-               tdb = tdb_open("run-open-multiple-times.tdb", flags[i],
-                              O_RDWR|O_CREAT, 0600, &tap_log_attr);
-               ok1(tdb);
-
-               ok1(tdb_transaction_start(tdb2) == 0);
-
-               /* Anything in the other one should fail. */
-               ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 1);
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 2);
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 3);
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 4);
-
-               /* Transaciton should work as normal. */
-               ok1(tdb_store(tdb2, key, data, TDB_REPLACE) == TDB_SUCCESS);
-
-               /* Now... try closing with locks held. */
-               ok1(tdb_close(tdb2) == 0);
-
-               ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-               ok1(tdb_deq(d, data));
-               free(d.dptr);
-               ok1(tdb_close(tdb) == 0);
-               ok1(tap_log_messages == 4);
-               tap_log_messages = 0;
-       }
-
-       return exit_status();
-}
diff --git a/lib/tdb2/test/api-simple-delete.c b/lib/tdb2/test/api-simple-delete.c
deleted file mode 100644 (file)
index 48b077a..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "config.h"
-#include "tdb2.h"
-#include "tap-interface.h"
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-simple-delete.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (tdb) {
-                       /* Delete should fail. */
-                       ok1(tdb_delete(tdb, key) == TDB_ERR_NOEXIST);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       /* Insert should succeed. */
-                       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       /* Delete should now work. */
-                       ok1(tdb_delete(tdb, key) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       tdb_close(tdb);
-               }
-       }
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/helpapi-external-agent.c b/lib/tdb2/test/helpapi-external-agent.c
deleted file mode 100644 (file)
index 59e1c6c..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "external-agent.h"
-
-/* This isn't possible with via the tdb2 API, but this makes it link. */
-enum agent_return external_agent_needs_rec(struct tdb_context *tdb)
-{
-       return FAILED;
-}
diff --git a/lib/tdb2/test/helprun-external-agent.c b/lib/tdb2/test/helprun-external-agent.c
deleted file mode 100644 (file)
index 9f24382..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "external-agent.h"
-#include "private.h"
-
-enum agent_return external_agent_needs_rec(struct tdb_context *tdb)
-{
-       return tdb_needs_recovery(tdb) ? SUCCESS : FAILED;
-}
diff --git a/lib/tdb2/test/helprun-layout.c b/lib/tdb2/test/helprun-layout.c
deleted file mode 100644 (file)
index b9cd4a6..0000000
+++ /dev/null
@@ -1,402 +0,0 @@
-/* TDB tools to create various canned database layouts. */
-#include "layout.h"
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <ccan/err/err.h>
-#include "logging.h"
-
-struct tdb_layout *new_tdb_layout(void)
-{
-       struct tdb_layout *layout = malloc(sizeof(*layout));
-       layout->num_elems = 0;
-       layout->elem = NULL;
-       return layout;
-}
-
-static void add(struct tdb_layout *layout, union tdb_layout_elem elem)
-{
-       layout->elem = realloc(layout->elem,
-                              sizeof(layout->elem[0])
-                              * (layout->num_elems+1));
-       layout->elem[layout->num_elems++] = elem;
-}
-
-void tdb_layout_add_freetable(struct tdb_layout *layout)
-{
-       union tdb_layout_elem elem;
-       elem.base.type = FREETABLE;
-       add(layout, elem);
-}
-
-void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
-                        unsigned ftable)
-{
-       union tdb_layout_elem elem;
-       elem.base.type = FREE;
-       elem.free.len = len;
-       elem.free.ftable_num = ftable;
-       add(layout, elem);
-}
-
-void tdb_layout_add_capability(struct tdb_layout *layout,
-                              uint64_t type,
-                              bool write_breaks,
-                              bool check_breaks,
-                              bool open_breaks,
-                              tdb_len_t extra)
-{
-       union tdb_layout_elem elem;
-       elem.base.type = CAPABILITY;
-       elem.capability.type = type;
-       if (write_breaks)
-               elem.capability.type |= TDB_CAP_NOWRITE;
-       if (open_breaks)
-               elem.capability.type |= TDB_CAP_NOOPEN;
-       if (check_breaks)
-               elem.capability.type |= TDB_CAP_NOCHECK;
-       elem.capability.extra = extra;
-       add(layout, elem);
-}
-
-static struct tdb_data dup_key(struct tdb_data key)
-{
-       struct tdb_data ret;
-       ret.dsize = key.dsize;
-       ret.dptr = malloc(ret.dsize);
-       memcpy(ret.dptr, key.dptr, ret.dsize);
-       return ret;
-}
-
-void tdb_layout_add_used(struct tdb_layout *layout,
-                        TDB_DATA key, TDB_DATA data,
-                        tdb_len_t extra)
-{
-       union tdb_layout_elem elem;
-       elem.base.type = DATA;
-       elem.used.key = dup_key(key);
-       elem.used.data = dup_key(data);
-       elem.used.extra = extra;
-       add(layout, elem);
-}
-
-static tdb_len_t free_record_len(tdb_len_t len)
-{
-       return sizeof(struct tdb_used_record) + len;
-}
-
-static tdb_len_t data_record_len(struct tle_used *used)
-{
-       tdb_len_t len;
-       len = sizeof(struct tdb_used_record)
-               + used->key.dsize + used->data.dsize + used->extra;
-       assert(len >= sizeof(struct tdb_free_record));
-       return len;
-}
-
-static tdb_len_t hashtable_len(struct tle_hashtable *htable)
-{
-       return sizeof(struct tdb_used_record)
-               + (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
-               + htable->extra;
-}
-
-static tdb_len_t capability_len(struct tle_capability *cap)
-{
-       return sizeof(struct tdb_capability) + cap->extra;
-}
-
-static tdb_len_t freetable_len(struct tle_freetable *ftable)
-{
-       return sizeof(struct tdb_freetable);
-}
-
-static void set_free_record(void *mem, tdb_len_t len)
-{
-       /* We do all the work in add_to_freetable */
-}
-
-static void add_zero_pad(struct tdb_used_record *u, size_t len, size_t extra)
-{
-       if (extra)
-               ((char *)(u + 1))[len] = '\0';
-}
-
-static void set_data_record(void *mem, struct tdb_context *tdb,
-                           struct tle_used *used)
-{
-       struct tdb_used_record *u = mem;
-
-       set_header(tdb, u, TDB_USED_MAGIC, used->key.dsize, used->data.dsize,
-                  used->key.dsize + used->data.dsize + used->extra,
-                  tdb_hash(tdb, used->key.dptr, used->key.dsize));
-       memcpy(u + 1, used->key.dptr, used->key.dsize);
-       memcpy((char *)(u + 1) + used->key.dsize,
-              used->data.dptr, used->data.dsize);
-       add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
-}
-
-static void set_hashtable(void *mem, struct tdb_context *tdb,
-                         struct tle_hashtable *htable)
-{
-       struct tdb_used_record *u = mem;
-       tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
-
-       set_header(tdb, u, TDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0);
-       memset(u + 1, 0, len);
-       add_zero_pad(u, len, htable->extra);
-}
-
-static void set_capability(void *mem, struct tdb_context *tdb,
-                          struct tle_capability *cap, struct tdb_header *hdr,
-                          tdb_off_t last_cap)
-{
-       struct tdb_capability *c = mem;
-       tdb_len_t len = sizeof(*c) - sizeof(struct tdb_used_record) + cap->extra;
-
-       c->type = cap->type;
-       c->next = 0;
-       set_header(tdb, &c->hdr, TDB_CAP_MAGIC, 0, len, len, 0);
-
-       /* Append to capability list. */
-       if (!last_cap) {
-               hdr->capabilities = cap->base.off;
-       } else {
-               c = (struct tdb_capability *)((char *)hdr + last_cap);
-               c->next = cap->base.off;
-       }
-}
-
-static void set_freetable(void *mem, struct tdb_context *tdb,
-                        struct tle_freetable *freetable, struct tdb_header *hdr,
-                        tdb_off_t last_ftable)
-{
-       struct tdb_freetable *ftable = mem;
-       memset(ftable, 0, sizeof(*ftable));
-       set_header(tdb, &ftable->hdr, TDB_FTABLE_MAGIC, 0,
-                       sizeof(*ftable) - sizeof(ftable->hdr),
-                       sizeof(*ftable) - sizeof(ftable->hdr), 0);
-
-       if (last_ftable) {
-               ftable = (struct tdb_freetable *)((char *)hdr + last_ftable);
-               ftable->next = freetable->base.off;
-       } else {
-               hdr->free_table = freetable->base.off;
-       }
-}
-
-static void add_to_freetable(struct tdb_context *tdb,
-                            tdb_off_t eoff,
-                            tdb_off_t elen,
-                            unsigned ftable,
-                            struct tle_freetable *freetable)
-{
-       tdb->ftable_off = freetable->base.off;
-       tdb->ftable = ftable;
-       add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen,
-                       TDB_LOCK_WAIT, false);
-}
-
-static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
-{
-       return group_start
-               + (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
-}
-
-/* Get bits from a value. */
-static uint32_t bits(uint64_t val, unsigned start, unsigned num)
-{
-       assert(num <= 32);
-       return (val >> start) & ((1U << num) - 1);
-}
-
-/* We take bits from the top: that way we can lock whole sections of the hash
- * by using lock ranges. */
-static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
-{
-       *used += num;
-       return bits(h, 64 - *used, num);
-}
-
-static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
-                              uint64_t h)
-{
-       return bucket
-               | new_off
-               | ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
-                                 TDB_OFF_UPPER_STEAL_EXTRA)
-                  << TDB_OFF_HASH_EXTRA_BIT);
-}
-
-/* FIXME: Our hash table handling here is primitive: we don't expand! */
-static void add_to_hashtable(struct tdb_context *tdb,
-                            tdb_off_t eoff,
-                            struct tdb_data key)
-{
-       uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
-       tdb_off_t b_off, group_start;
-       unsigned i, group, in_group;
-       unsigned used = 0;
-
-       group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
-       in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
-
-       group_start = offsetof(struct tdb_header, hashtable)
-               + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
-
-       for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-               unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
-
-               b_off = hbucket_off(group_start, bucket);
-               if (tdb_read_off(tdb, b_off) == 0) {
-                       tdb_write_off(tdb, b_off,
-                                     encode_offset(eoff, in_group, h));
-                       return;
-               }
-       }
-       abort();
-}
-
-static struct tle_freetable *find_ftable(struct tdb_layout *layout, unsigned num)
-{
-       unsigned i;
-
-       for (i = 0; i < layout->num_elems; i++) {
-               if (layout->elem[i].base.type != FREETABLE)
-                       continue;
-               if (num == 0)
-                       return &layout->elem[i].ftable;
-               num--;
-       }
-       abort();
-}
-
-/* FIXME: Support TDB_CONVERT */
-struct tdb_context *tdb_layout_get(struct tdb_layout *layout,
-                                  void (*freefn)(void *),
-                                  union tdb_attribute *attr)
-{
-       unsigned int i;
-       tdb_off_t off, len, last_ftable, last_cap;
-       char *mem;
-       struct tdb_context *tdb;
-
-       off = sizeof(struct tdb_header);
-
-       /* First pass of layout: calc lengths */
-       for (i = 0; i < layout->num_elems; i++) {
-               union tdb_layout_elem *e = &layout->elem[i];
-               e->base.off = off;
-               switch (e->base.type) {
-               case FREETABLE:
-                       len = freetable_len(&e->ftable);
-                       break;
-               case FREE:
-                       len = free_record_len(e->free.len);
-                       break;
-               case DATA:
-                       len = data_record_len(&e->used);
-                       break;
-               case HASHTABLE:
-                       len = hashtable_len(&e->hashtable);
-                       break;
-               case CAPABILITY:
-                       len = capability_len(&e->capability);
-                       break;
-               default:
-                       abort();
-               }
-               off += len;
-       }
-
-       mem = malloc(off);
-       /* Fill with some weird pattern. */
-       memset(mem, 0x99, off);
-       /* Now populate our header, cribbing from a real TDB header. */
-       tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, attr);
-       memcpy(mem, tdb->file->map_ptr, sizeof(struct tdb_header));
-
-       /* Mug the tdb we have to make it use this. */
-       freefn(tdb->file->map_ptr);
-       tdb->file->map_ptr = mem;
-       tdb->file->map_size = off;
-
-       last_ftable = 0;
-       last_cap = 0;
-       for (i = 0; i < layout->num_elems; i++) {
-               union tdb_layout_elem *e = &layout->elem[i];
-               switch (e->base.type) {
-               case FREETABLE:
-                       set_freetable(mem + e->base.off, tdb, &e->ftable,
-                                    (struct tdb_header *)mem, last_ftable);
-                       last_ftable = e->base.off;
-                       break;
-               case FREE:
-                       set_free_record(mem + e->base.off, e->free.len);
-                       break;
-               case DATA:
-                       set_data_record(mem + e->base.off, tdb, &e->used);
-                       break;
-               case HASHTABLE:
-                       set_hashtable(mem + e->base.off, tdb, &e->hashtable);
-                       break;
-               case CAPABILITY:
-                       set_capability(mem + e->base.off, tdb, &e->capability,
-                                      (struct tdb_header *)mem, last_cap);
-                       last_cap = e->base.off;
-                       break;
-               }
-       }
-       /* Must have a free table! */
-       assert(last_ftable);
-
-       /* Now fill the free and hash tables. */
-       for (i = 0; i < layout->num_elems; i++) {
-               union tdb_layout_elem *e = &layout->elem[i];
-               switch (e->base.type) {
-               case FREE:
-                       add_to_freetable(tdb, e->base.off, e->free.len,
-                                        e->free.ftable_num,
-                                        find_ftable(layout, e->free.ftable_num));
-                       break;
-               case DATA:
-                       add_to_hashtable(tdb, e->base.off, e->used.key);
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       tdb->ftable_off = find_ftable(layout, 0)->base.off;
-       return tdb;
-}
-
-void tdb_layout_write(struct tdb_layout *layout, void (*freefn)(void *),
-                      union tdb_attribute *attr, const char *filename)
-{
-       struct tdb_context *tdb = tdb_layout_get(layout, freefn, attr);
-       int fd;
-
-       fd = open(filename, O_WRONLY|O_TRUNC|O_CREAT,  0600);
-       if (fd < 0)
-               err(1, "opening %s for writing", filename);
-       if (write(fd, tdb->file->map_ptr, tdb->file->map_size)
-           != tdb->file->map_size)
-               err(1, "writing %s", filename);
-       close(fd);
-       tdb_close(tdb);
-}
-
-void tdb_layout_free(struct tdb_layout *layout)
-{
-       unsigned int i;
-
-       for (i = 0; i < layout->num_elems; i++) {
-               if (layout->elem[i].base.type == DATA) {
-                       free(layout->elem[i].used.key.dptr);
-                       free(layout->elem[i].used.data.dptr);
-               }
-       }
-       free(layout->elem);
-       free(layout);
-}
diff --git a/lib/tdb2/test/logging.h b/lib/tdb2/test/logging.h
deleted file mode 100644 (file)
index 5f517dc..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef TDB2_TEST_LOGGING_H
-#define TDB2_TEST_LOGGING_H
-#include "tdb2.h"
-#include <stdbool.h>
-#include <string.h>
-
-extern bool suppress_logging;
-extern const char *log_prefix;
-extern unsigned tap_log_messages;
-extern union tdb_attribute tap_log_attr;
-extern char *log_last;
-
-void tap_log_fn(struct tdb_context *tdb,
-               enum tdb_log_level level,
-               enum TDB_ERROR ecode,
-               const char *message, void *priv);
-#endif /* TDB2_TEST_LOGGING_H */
diff --git a/lib/tdb2/test/run-02-expand.c b/lib/tdb2/test/run-02-expand.c
deleted file mode 100644 (file)
index fd1ae4b..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-#include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       uint64_t val;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1);
-
-       failtest_init(argc, argv);
-       failtest_hook = block_repeat_failures;
-       failtest_exit_check = exit_check_log;
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               failtest_suppress = true;
-               tdb = tdb_open("run-expand.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       break;
-
-               val = tdb->file->map_size;
-               /* Need some hash lock for expand. */
-               ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0);
-               failtest_suppress = false;
-               if (!ok1(tdb_expand(tdb, 1) == 0)) {
-                       failtest_suppress = true;
-                       tdb_close(tdb);
-                       break;
-               }
-               failtest_suppress = true;
-
-               ok1(tdb->file->map_size >= val + 1 * TDB_EXTENSION_FACTOR);
-               ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               val = tdb->file->map_size;
-               ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0);
-               failtest_suppress = false;
-               if (!ok1(tdb_expand(tdb, 1024) == 0)) {
-                       failtest_suppress = true;
-                       tdb_close(tdb);
-                       break;
-               }
-               failtest_suppress = true;
-               ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0);
-               ok1(tdb->file->map_size >= val + 1024 * TDB_EXTENSION_FACTOR);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       failtest_exit(exit_status());
-}
diff --git a/lib/tdb2/test/run-03-coalesce.c b/lib/tdb2/test/run-03-coalesce.c
deleted file mode 100644 (file)
index ecc469f..0000000
+++ /dev/null
@@ -1,178 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "layout.h"
-
-static tdb_len_t free_record_length(struct tdb_context *tdb, tdb_off_t off)
-{
-       struct tdb_free_record f;
-       enum TDB_ERROR ecode;
-
-       ecode = tdb_read_convert(tdb, off, &f, sizeof(f));
-       if (ecode != TDB_SUCCESS)
-               return ecode;
-       if (frec_magic(&f) != TDB_FREE_MAGIC)
-               return TDB_ERR_CORRUPT;
-       return frec_len(&f);
-}
-
-int main(int argc, char *argv[])
-{
-       tdb_off_t b_off, test;
-       struct tdb_context *tdb;
-       struct tdb_layout *layout;
-       struct tdb_data data, key;
-       tdb_len_t len;
-
-       /* FIXME: Test TDB_CONVERT */
-       /* FIXME: Test lock order fail. */
-
-       plan_tests(42);
-       data = tdb_mkdata("world", 5);
-       key = tdb_mkdata("hello", 5);
-
-       /* No coalescing can be done due to EOF */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       len = 1024;
-       tdb_layout_add_free(layout, len, 0);
-       tdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.tdb");
-       /* NOMMAP is for lockcheck. */
-       tdb = tdb_open("run-03-coalesce.tdb", TDB_NOMMAP, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
-
-       /* Figure out which bucket free entry is. */
-       b_off = bucket_off(tdb->ftable_off, size_to_bucket(len));
-       /* Lock and fail to coalesce. */
-       ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
-       test = layout->elem[1].base.off;
-       ok1(coalesce(tdb, layout->elem[1].base.off, b_off, len, &test)
-           == 0);
-       tdb_unlock_free_bucket(tdb, b_off);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
-       ok1(test == layout->elem[1].base.off);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       /* No coalescing can be done due to used record */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_free(layout, 1024, 0);
-       tdb_layout_add_used(layout, key, data, 6);
-       tdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.tdb");
-       /* NOMMAP is for lockcheck. */
-       tdb = tdb_open("run-03-coalesce.tdb", TDB_NOMMAP, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Figure out which bucket free entry is. */
-       b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024));
-       /* Lock and fail to coalesce. */
-       ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
-       test = layout->elem[1].base.off;
-       ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
-           == 0);
-       tdb_unlock_free_bucket(tdb, b_off);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
-       ok1(test == layout->elem[1].base.off);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       /* Coalescing can be done due to two free records, then EOF */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_free(layout, 1024, 0);
-       tdb_layout_add_free(layout, 2048, 0);
-       tdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.tdb");
-       /* NOMMAP is for lockcheck. */
-       tdb = tdb_open("run-03-coalesce.tdb", TDB_NOMMAP, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
-       ok1(free_record_length(tdb, layout->elem[2].base.off) == 2048);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Figure out which bucket (first) free entry is. */
-       b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024));
-       /* Lock and coalesce. */
-       ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
-       test = layout->elem[2].base.off;
-       ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
-           == 1024 + sizeof(struct tdb_used_record) + 2048);
-       /* Should tell us it's erased this one... */
-       ok1(test == TDB_ERR_NOEXIST);
-       ok1(tdb->file->allrecord_lock.count == 0 && tdb->file->num_lockrecs == 0);
-       ok1(free_record_length(tdb, layout->elem[1].base.off)
-           == 1024 + sizeof(struct tdb_used_record) + 2048);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       /* Coalescing can be done due to two free records, then data */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_free(layout, 1024, 0);
-       tdb_layout_add_free(layout, 512, 0);
-       tdb_layout_add_used(layout, key, data, 6);
-       tdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.tdb");
-       /* NOMMAP is for lockcheck. */
-       tdb = tdb_open("run-03-coalesce.tdb", TDB_NOMMAP, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
-       ok1(free_record_length(tdb, layout->elem[2].base.off) == 512);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Figure out which bucket free entry is. */
-       b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024));
-       /* Lock and coalesce. */
-       ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
-       test = layout->elem[2].base.off;
-       ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
-           == 1024 + sizeof(struct tdb_used_record) + 512);
-       ok1(tdb->file->allrecord_lock.count == 0 && tdb->file->num_lockrecs == 0);
-       ok1(free_record_length(tdb, layout->elem[1].base.off)
-           == 1024 + sizeof(struct tdb_used_record) + 512);
-       ok1(test == TDB_ERR_NOEXIST);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       /* Coalescing can be done due to three free records, then EOF */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_free(layout, 1024, 0);
-       tdb_layout_add_free(layout, 512, 0);
-       tdb_layout_add_free(layout, 256, 0);
-       tdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.tdb");
-       /* NOMMAP is for lockcheck. */
-       tdb = tdb_open("run-03-coalesce.tdb", TDB_NOMMAP, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
-       ok1(free_record_length(tdb, layout->elem[2].base.off) == 512);
-       ok1(free_record_length(tdb, layout->elem[3].base.off) == 256);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Figure out which bucket free entry is. */
-       b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024));
-       /* Lock and coalesce. */
-       ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
-       test = layout->elem[2].base.off;
-       ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
-           == 1024 + sizeof(struct tdb_used_record) + 512
-           + sizeof(struct tdb_used_record) + 256);
-       ok1(tdb->file->allrecord_lock.count == 0
-           && tdb->file->num_lockrecs == 0);
-       ok1(free_record_length(tdb, layout->elem[1].base.off)
-           == 1024 + sizeof(struct tdb_used_record) + 512
-           + sizeof(struct tdb_used_record) + 256);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/run-04-basichash.c b/lib/tdb2/test/run-04-basichash.c
deleted file mode 100644 (file)
index dc75fc7..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-
-/* We rig the hash so adjacent-numbered records always clash. */
-static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
-{
-       return ((uint64_t)*(const unsigned int *)key)
-               << (64 - TDB_TOPLEVEL_HASH_BITS - 1);
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       unsigned int v;
-       struct tdb_used_record rec;
-       struct tdb_data key = { (unsigned char *)&v, sizeof(v) };
-       struct tdb_data dbuf = { (unsigned char *)&v, sizeof(v) };
-       union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                                               .fn = clash } };
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-       };
-
-       hattr.base.next = &tap_log_attr;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0])
-                  * (91 + (2 * ((1 << TDB_HASH_GROUP_BITS) - 1))) + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               struct hash_info h;
-               tdb_off_t new_off, off, subhash;
-
-               tdb = tdb_open("run-04-basichash.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               v = 0;
-               /* Should not find it. */
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 0. */
-               ok1(h.group_start == offsetof(struct tdb_header, hashtable));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
-
-               /* Should have lock on bucket 0 */
-               ok1(h.hlock_start == 0);
-               ok1(h.hlock_range ==
-                   1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
-               ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
-               ok1((tdb->flags & TDB_NOLOCK)
-                   || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
-               /* FIXME: Check lock length */
-
-               /* Allocate a new record. */
-               new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h,
-                               TDB_USED_MAGIC, false);
-               ok1(!TDB_OFF_IS_ERR(new_off));
-
-               /* We should be able to add it now. */
-               ok1(add_to_hash(tdb, &h, new_off) == 0);
-
-               /* Make sure we fill it in for later finding. */
-               off = new_off + sizeof(struct tdb_used_record);
-               ok1(!tdb->io->twrite(tdb, off, key.dptr, key.dsize));
-               off += key.dsize;
-               ok1(!tdb->io->twrite(tdb, off, dbuf.dptr, dbuf.dsize));
-
-               /* We should be able to unlock that OK. */
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-
-               /* Database should be consistent. */
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Now, this should give a successful lookup. */
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL)
-                   == new_off);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 0. */
-               ok1(h.group_start == offsetof(struct tdb_header, hashtable));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
-
-               /* Should have lock on bucket 0 */
-               ok1(h.hlock_start == 0);
-               ok1(h.hlock_range ==
-                   1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
-               ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
-               ok1((tdb->flags & TDB_NOLOCK)
-                   || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
-               /* FIXME: Check lock length */
-
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-
-               /* Database should be consistent. */
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Test expansion. */
-               v = 1;
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 1. */
-               ok1(h.group_start == offsetof(struct tdb_header, hashtable));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 1);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
-
-               /* Should have lock on bucket 0 */
-               ok1(h.hlock_start == 0);
-               ok1(h.hlock_range ==
-                   1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
-               ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
-               ok1((tdb->flags & TDB_NOLOCK)
-                   || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
-               /* FIXME: Check lock length */
-
-               /* Make it expand 0'th bucket. */
-               ok1(expand_group(tdb, &h) == 0);
-               /* First one should be subhash, next should be empty. */
-               ok1(is_subhash(h.group[0]));
-               subhash = (h.group[0] & TDB_OFF_MASK);
-               for (j = 1; j < (1 << TDB_HASH_GROUP_BITS); j++)
-                       ok1(h.group[j] == 0);
-
-               ok1(tdb_write_convert(tdb, h.group_start,
-                                     h.group, sizeof(h.group)) == 0);
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-
-               /* Should be happy with expansion. */
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Should be able to find it. */
-               v = 0;
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL)
-                   == new_off);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in expanded group 0, bucket 0. */
-               ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
-                   + TDB_SUBLEVEL_HASH_BITS);
-
-               /* Should have lock on bucket 0 */
-               ok1(h.hlock_start == 0);
-               ok1(h.hlock_range ==
-                   1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
-               ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
-               ok1((tdb->flags & TDB_NOLOCK)
-                   || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
-               /* FIXME: Check lock length */
-
-               /* Simple delete should work. */
-               ok1(delete_from_hash(tdb, &h) == 0);
-               ok1(add_free_record(tdb, new_off,
-                                   sizeof(struct tdb_used_record)
-                                   + rec_key_length(&rec)
-                                   + rec_data_length(&rec)
-                                   + rec_extra_padding(&rec),
-                                   TDB_LOCK_NOWAIT, false) == 0);
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Test second-level expansion: should expand 0th bucket. */
-               v = 0;
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 0. */
-               ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS+TDB_SUBLEVEL_HASH_BITS);
-
-               /* Should have lock on bucket 0 */
-               ok1(h.hlock_start == 0);
-               ok1(h.hlock_range ==
-                   1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
-               ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
-               ok1((tdb->flags & TDB_NOLOCK)
-                   || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
-               /* FIXME: Check lock length */
-
-               ok1(expand_group(tdb, &h) == 0);
-               /* First one should be subhash, next should be empty. */
-               ok1(is_subhash(h.group[0]));
-               subhash = (h.group[0] & TDB_OFF_MASK);
-               for (j = 1; j < (1 << TDB_HASH_GROUP_BITS); j++)
-                       ok1(h.group[j] == 0);
-               ok1(tdb_write_convert(tdb, h.group_start,
-                                     h.group, sizeof(h.group)) == 0);
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-
-               /* Should be happy with expansion. */
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 0. */
-               ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
-                   + TDB_SUBLEVEL_HASH_BITS * 2);
-
-               /* We should be able to add it now. */
-               /* Allocate a new record. */
-               new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h,
-                               TDB_USED_MAGIC, false);
-               ok1(!TDB_OFF_IS_ERR(new_off));
-               ok1(add_to_hash(tdb, &h, new_off) == 0);
-
-               /* Make sure we fill it in for later finding. */
-               off = new_off + sizeof(struct tdb_used_record);
-               ok1(!tdb->io->twrite(tdb, off, key.dptr, key.dsize));
-               off += key.dsize;
-               ok1(!tdb->io->twrite(tdb, off, dbuf.dptr, dbuf.dsize));
-
-               /* We should be able to unlock that OK. */
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-
-               /* Database should be consistent. */
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Should be able to find it. */
-               v = 0;
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL)
-                   == new_off);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in expanded group 0, bucket 0. */
-               ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
-                   + TDB_SUBLEVEL_HASH_BITS * 2);
-
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/run-20-growhash.c b/lib/tdb2/test/run-20-growhash.c
deleted file mode 100644 (file)
index 2f634a2..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-
-static uint64_t myhash(const void *key, size_t len, uint64_t seed, void *priv)
-{
-       return *(const uint64_t *)key;
-}
-
-static void add_bits(uint64_t *val, unsigned new, unsigned new_bits,
-                    unsigned *done)
-{
-       *done += new_bits;
-       *val |= ((uint64_t)new << (64 - *done));
-}
-
-static uint64_t make_key(unsigned topgroup, unsigned topbucket,
-                        unsigned subgroup1, unsigned subbucket1,
-                        unsigned subgroup2, unsigned subbucket2)
-{
-       uint64_t key = 0;
-       unsigned done = 0;
-
-       add_bits(&key, topgroup, TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
-                &done);
-       add_bits(&key, topbucket, TDB_HASH_GROUP_BITS, &done);
-       add_bits(&key, subgroup1, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
-                &done);
-       add_bits(&key, subbucket1, TDB_HASH_GROUP_BITS, &done);
-       add_bits(&key, subgroup2, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
-                &done);
-       add_bits(&key, subbucket2, TDB_HASH_GROUP_BITS, &done);
-       return key;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       uint64_t kdata;
-       struct tdb_used_record rec;
-       struct tdb_data key = { (unsigned char *)&kdata, sizeof(kdata) };
-       struct tdb_data dbuf = { (unsigned char *)&kdata, sizeof(kdata) };
-       union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                                               .fn = myhash } };
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-       };
-
-       hattr.base.next = &tap_log_attr;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0])
-                  * (9 + (20 + 2 * ((1 << TDB_HASH_GROUP_BITS) - 2))
-                     * (1 << TDB_HASH_GROUP_BITS)) + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               struct hash_info h;
-
-               tdb = tdb_open("run-20-growhash.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Fill a group. */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
-                       kdata = make_key(0, j, 0, 0, 0, 0);
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-               }
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Check first still exists. */
-               kdata = make_key(0, 0, 0, 0, 0, 0);
-               ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL) != 0);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 0. */
-               ok1(h.group_start == offsetof(struct tdb_header, hashtable));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
-               /* Entire group should be full! */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++)
-                       ok1(h.group[j] != 0);
-
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_RDLCK) == 0);
-
-               /* Now, add one more to each should expand (that) bucket. */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
-                       unsigned int k;
-                       kdata = make_key(0, j, 0, 1, 0, 0);
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-                       ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL));
-                       /* Should have created correct hash. */
-                       ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-                       /* Should have moved to subhash */
-                       ok1(h.group_start >= sizeof(struct tdb_header));
-                       ok1(h.home_bucket == 1);
-                       ok1(h.found_bucket == 1);
-                       ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
-                           + TDB_SUBLEVEL_HASH_BITS);
-                       ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                             F_RDLCK) == 0);
-
-                       /* Keep adding, make it expand again. */
-                       for (k = 2; k < (1 << TDB_HASH_GROUP_BITS); k++) {
-                               kdata = make_key(0, j, 0, k, 0, 0);
-                               ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-                               ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       }
-
-                       /* This should tip it over to sub-sub-hash. */
-                       kdata = make_key(0, j, 0, 0, 0, 1);
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-                       ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL));
-                       /* Should have created correct hash. */
-                       ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-                       /* Should have moved to subhash */
-                       ok1(h.group_start >= sizeof(struct tdb_header));
-                       ok1(h.home_bucket == 1);
-                       ok1(h.found_bucket == 1);
-                       ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
-                           + TDB_SUBLEVEL_HASH_BITS + TDB_SUBLEVEL_HASH_BITS);
-                       ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                             F_RDLCK) == 0);
-               }
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/run-25-hashoverload.c b/lib/tdb2/test/run-25-hashoverload.c
deleted file mode 100644 (file)
index 8503215..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-
-static uint64_t badhash(const void *key, size_t len, uint64_t seed, void *priv)
-{
-       return 0;
-}
-
-static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p)
-{
-       if (p)
-               return tdb_delete(tdb, key);
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
-       struct tdb_data dbuf = { (unsigned char *)&j, sizeof(j) };
-       union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                                               .fn = badhash } };
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-       };
-
-       hattr.base.next = &tap_log_attr;
-
-       plan_tests(6883);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
-
-               tdb = tdb_open("run-25-hashoverload.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Fill a group. */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-               }
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Now store one last value: should form chain. */
-               ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Check we can find them all. */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS) + 1; j++) {
-                       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-                       ok1(d.dsize == sizeof(j));
-                       ok1(d.dptr != NULL);
-                       ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
-                       free(d.dptr);
-               }
-
-               /* Now add a *lot* more. */
-               for (j = (1 << TDB_HASH_GROUP_BITS) + 1;
-                    j < (16 << TDB_HASH_GROUP_BITS);
-                    j++) {
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-                       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-                       ok1(d.dsize == sizeof(j));
-                       ok1(d.dptr != NULL);
-                       ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
-                       free(d.dptr);
-               }
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Traverse through them. */
-               ok1(tdb_traverse(tdb, trav, NULL) == j);
-
-               /* Empty the first chain-worth. */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++)
-                       ok1(tdb_delete(tdb, key) == 0);
-
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               for (j = (1 << TDB_HASH_GROUP_BITS);
-                    j < (16 << TDB_HASH_GROUP_BITS);
-                    j++) {
-                       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-                       ok1(d.dsize == sizeof(j));
-                       ok1(d.dptr != NULL);
-                       ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
-                       free(d.dptr);
-               }
-
-               /* Traverse through them. */
-               ok1(tdb_traverse(tdb, trav, NULL)
-                   == (15 << TDB_HASH_GROUP_BITS));
-
-               /* Re-add */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-               }
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Now try deleting as we go. */
-               ok1(tdb_traverse(tdb, trav, trav)
-                   == (16 << TDB_HASH_GROUP_BITS));
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb_traverse(tdb, trav, NULL) == 0);
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/run-30-exhaust-before-expand.c b/lib/tdb2/test/run-30-exhaust-before-expand.c
deleted file mode 100644 (file)
index 13bb946..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-
-static bool empty_freetable(struct tdb_context *tdb)
-{
-       struct tdb_freetable ftab;
-       unsigned int i;
-
-       /* Now, free table should be completely exhausted in zone 0 */
-       if (tdb_read_convert(tdb, tdb->ftable_off, &ftab, sizeof(ftab)) != 0)
-               abort();
-
-       for (i = 0; i < sizeof(ftab.buckets)/sizeof(ftab.buckets[0]); i++) {
-               if (ftab.buckets[i])
-                       return false;
-       }
-       return true;
-}
-
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               TDB_DATA k;
-               uint64_t size;
-               bool was_empty = false;
-
-               k.dptr = (void *)&j;
-               k.dsize = sizeof(j);
-
-               tdb = tdb_open("run-30-exhaust-before-expand.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(empty_freetable(tdb));
-               /* Need some hash lock for expand. */
-               ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0);
-               /* Create some free space. */
-               ok1(tdb_expand(tdb, 1) == 0);
-               ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(!empty_freetable(tdb));
-
-               size = tdb->file->map_size;
-               /* Insert minimal-length records until we expand. */
-               for (j = 0; tdb->file->map_size == size; j++) {
-                       was_empty = empty_freetable(tdb);
-                       if (tdb_store(tdb, k, k, TDB_INSERT) != 0)
-                               err(1, "Failed to store record %i", j);
-               }
-
-               /* Would have been empty before expansion, but no longer. */
-               ok1(was_empty);
-               ok1(!empty_freetable(tdb));
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/run-50-multiple-freelists.c b/lib/tdb2/test/run-50-multiple-freelists.c
deleted file mode 100644 (file)
index b102876..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "layout.h"
-
-int main(int argc, char *argv[])
-{
-       tdb_off_t off;
-       struct tdb_context *tdb;
-       struct tdb_layout *layout;
-       TDB_DATA key, data;
-       union tdb_attribute seed;
-
-       /* This seed value previously tickled a layout.c bug. */
-       seed.base.attr = TDB_ATTRIBUTE_SEED;
-       seed.seed.seed = 0xb1142bc054d035b4ULL;
-       seed.base.next = &tap_log_attr;
-
-       plan_tests(11);
-       key = tdb_mkdata("Hello", 5);
-       data = tdb_mkdata("world", 5);
-
-       /* Create a TDB with three free tables. */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_free(layout, 80, 0);
-       /* Used record prevent coalescing. */
-       tdb_layout_add_used(layout, key, data, 6);
-       tdb_layout_add_free(layout, 160, 1);
-       key.dsize--;
-       tdb_layout_add_used(layout, key, data, 7);
-       tdb_layout_add_free(layout, 320, 2);
-       key.dsize--;
-       tdb_layout_add_used(layout, key, data, 8);
-       tdb_layout_add_free(layout, 40, 0);
-       tdb = tdb_layout_get(layout, free, &seed);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       off = get_free(tdb, 0, 80 - sizeof(struct tdb_used_record), 0,
-                      TDB_USED_MAGIC, 0);
-       ok1(off == layout->elem[3].base.off);
-       ok1(tdb->ftable_off == layout->elem[0].base.off);
-
-       off = get_free(tdb, 0, 160 - sizeof(struct tdb_used_record), 0,
-                      TDB_USED_MAGIC, 0);
-       ok1(off == layout->elem[5].base.off);
-       ok1(tdb->ftable_off == layout->elem[1].base.off);
-
-       off = get_free(tdb, 0, 320 - sizeof(struct tdb_used_record), 0,
-                      TDB_USED_MAGIC, 0);
-       ok1(off == layout->elem[7].base.off);
-       ok1(tdb->ftable_off == layout->elem[2].base.off);
-
-       off = get_free(tdb, 0, 40 - sizeof(struct tdb_used_record), 0,
-                      TDB_USED_MAGIC, 0);
-       ok1(off == layout->elem[9].base.off);
-       ok1(tdb->ftable_off == layout->elem[0].base.off);
-
-       /* Now we fail. */
-       off = get_free(tdb, 0, 0, 1, TDB_USED_MAGIC, 0);
-       ok1(off == 0);
-
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/run-64-bit-tdb.c b/lib/tdb2/test/run-64-bit-tdb.c
deleted file mode 100644 (file)
index ef6e243..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       if (sizeof(off_t) <= 4) {
-               plan_tests(1);
-               pass("No 64 bit off_t");
-               return exit_status();
-       }
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 14);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               off_t old_size;
-               TDB_DATA k, d;
-               struct hash_info h;
-               struct tdb_used_record rec;
-               tdb_off_t off;
-
-               tdb = tdb_open("run-64-bit-tdb.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               old_size = tdb->file->map_size;
-
-               /* This makes a sparse file */
-               ok1(ftruncate(tdb->file->fd, 0xFFFFFFF0) == 0);
-               ok1(add_free_record(tdb, old_size, 0xFFFFFFF0 - old_size,
-                                   TDB_LOCK_WAIT, false) == TDB_SUCCESS);
-
-               /* Now add a little record past the 4G barrier. */
-               ok1(tdb_expand_file(tdb, 100) == TDB_SUCCESS);
-               ok1(add_free_record(tdb, 0xFFFFFFF0, 100, TDB_LOCK_WAIT, false)
-                   == TDB_SUCCESS);
-
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-
-               /* Test allocation path. */
-               k = tdb_mkdata("key", 4);
-               d = tdb_mkdata("data", 5);
-               ok1(tdb_store(tdb, k, d, TDB_INSERT) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-
-               /* Make sure it put it at end as we expected. */
-               off = find_and_lock(tdb, k, F_RDLCK, &h, &rec, NULL);
-               ok1(off >= 0xFFFFFFF0);
-               tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-
-               ok1(tdb_fetch(tdb, k, &d) == 0);
-               ok1(d.dsize == 5);
-               ok1(strcmp((char *)d.dptr, "data") == 0);
-               free(d.dptr);
-
-               ok1(tdb_delete(tdb, k) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-
-               tdb_close(tdb);
-       }
-
-       /* We might get messages about mmap failing, so don't test
-        * tap_log_messages */
-       return exit_status();
-}
diff --git a/lib/tdb2/test/run-90-get-set-attributes.c b/lib/tdb2/test/run-90-get-set-attributes.c
deleted file mode 100644 (file)
index edf0735..0000000
+++ /dev/null
@@ -1,159 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-
-static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
-                 void *unused)
-{
-       return 0;
-}
-
-static int myunlock(int fd, int rw, off_t off, off_t len, void *unused)
-{
-       return 0;
-}
-
-static uint64_t hash_fn(const void *key, size_t len, uint64_t seed,
-                       void *priv)
-{
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-       union tdb_attribute seed_attr;
-       union tdb_attribute hash_attr;
-       union tdb_attribute lock_attr;
-
-       seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
-       seed_attr.base.next = &hash_attr;
-       seed_attr.seed.seed = 100;
-
-       hash_attr.base.attr = TDB_ATTRIBUTE_HASH;
-       hash_attr.base.next = &lock_attr;
-       hash_attr.hash.fn = hash_fn;
-       hash_attr.hash.data = &hash_attr;
-
-       lock_attr.base.attr = TDB_ATTRIBUTE_FLOCK;
-       lock_attr.base.next = &tap_log_attr;
-       lock_attr.flock.lock = mylock;
-       lock_attr.flock.unlock = myunlock;
-       lock_attr.flock.data = &lock_attr;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 50);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               union tdb_attribute attr;
-
-               /* First open with no attributes. */
-               tdb = tdb_open("run-90-get-set-attributes.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
-               ok1(tdb);
-
-               /* Get log on no attributes will fail */
-               attr.base.attr = TDB_ATTRIBUTE_LOG;
-               ok1(tdb_get_attribute(tdb, &attr) == TDB_ERR_NOEXIST);
-               /* These always work. */
-               attr.base.attr = TDB_ATTRIBUTE_HASH;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_HASH);
-               ok1(attr.hash.fn == tdb_jenkins_hash);
-               attr.base.attr = TDB_ATTRIBUTE_FLOCK;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK);
-               ok1(attr.flock.lock == tdb_fcntl_lock);
-               ok1(attr.flock.unlock == tdb_fcntl_unlock);
-               attr.base.attr = TDB_ATTRIBUTE_SEED;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_SEED);
-               /* This is possible, just astronomically unlikely. */
-               ok1(attr.seed.seed != 0);
-
-               /* Unset attributes. */
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG);
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
-
-               /* Set them. */
-               ok1(tdb_set_attribute(tdb, &tap_log_attr) == 0);
-               ok1(tdb_set_attribute(tdb, &lock_attr) == 0);
-               /* These should fail. */
-               ok1(tdb_set_attribute(tdb, &seed_attr) == TDB_ERR_EINVAL);
-               ok1(tap_log_messages == 1);
-               ok1(tdb_set_attribute(tdb, &hash_attr) == TDB_ERR_EINVAL);
-               ok1(tap_log_messages == 2);
-               tap_log_messages = 0;
-
-               /* Getting them should work as expected. */
-               attr.base.attr = TDB_ATTRIBUTE_LOG;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_LOG);
-               ok1(attr.log.fn == tap_log_attr.log.fn);
-               ok1(attr.log.data == tap_log_attr.log.data);
-
-               attr.base.attr = TDB_ATTRIBUTE_FLOCK;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK);
-               ok1(attr.flock.lock == mylock);
-               ok1(attr.flock.unlock == myunlock);
-               ok1(attr.flock.data == &lock_attr);
-
-               /* Unset them again. */
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
-               ok1(tap_log_messages == 0);
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG);
-               ok1(tap_log_messages == 0);
-
-               tdb_close(tdb);
-               ok1(tap_log_messages == 0);
-
-               /* Now open with all attributes. */
-               tdb = tdb_open("run-90-get-set-attributes.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600,
-                              &seed_attr);
-
-               ok1(tdb);
-
-               /* Get will succeed */
-               attr.base.attr = TDB_ATTRIBUTE_LOG;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_LOG);
-               ok1(attr.log.fn == tap_log_attr.log.fn);
-               ok1(attr.log.data == tap_log_attr.log.data);
-
-               attr.base.attr = TDB_ATTRIBUTE_HASH;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_HASH);
-               ok1(attr.hash.fn == hash_fn);
-               ok1(attr.hash.data == &hash_attr);
-
-               attr.base.attr = TDB_ATTRIBUTE_FLOCK;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK);
-               ok1(attr.flock.lock == mylock);
-               ok1(attr.flock.unlock == myunlock);
-               ok1(attr.flock.data == &lock_attr);
-
-               attr.base.attr = TDB_ATTRIBUTE_SEED;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_SEED);
-               ok1(attr.seed.seed == seed_attr.seed.seed);
-
-               /* Unset attributes. */
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_HASH);
-               ok1(tap_log_messages == 1);
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_SEED);
-               ok1(tap_log_messages == 2);
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG);
-               ok1(tap_log_messages == 2);
-               tap_log_messages = 0;
-
-               tdb_close(tdb);
-
-       }
-       return exit_status();
-}
diff --git a/lib/tdb2/test/run-expand-in-transaction.c b/lib/tdb2/test/run-expand-in-transaction.c
deleted file mode 100644 (file)
index 6b22d2e..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               size_t size;
-               tdb = tdb_open("run-expand-in-transaction.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               size = tdb->file->map_size;
-               ok1(tdb_transaction_start(tdb) == 0);
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb->file->map_size > size);
-               ok1(tdb_transaction_commit(tdb) == 0);
-               ok1(tdb->file->map_size > size);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/lib/tdb2/test/run-seed.c b/lib/tdb2/test/run-seed.c
deleted file mode 100644 (file)
index 9c90833..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-
-static int log_count = 0;
-
-/* Normally we get a log when setting random seed. */
-static void my_log_fn(struct tdb_context *tdb,
-                     enum tdb_log_level level,
-                     enum TDB_ERROR ecode,
-                     const char *message, void *priv)
-{
-       log_count++;
-}
-
-static union tdb_attribute log_attr = {
-       .log = { .base = { .attr = TDB_ATTRIBUTE_LOG },
-                .fn = my_log_fn }
-};
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       union tdb_attribute attr;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       attr.seed.base.attr = TDB_ATTRIBUTE_SEED;
-       attr.seed.base.next = &log_attr;
-       attr.seed.seed = 42;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 4 * 3);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               struct tdb_header hdr;
-               int fd;
-               tdb = tdb_open("run-seed.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb->hash_seed == 42);
-               ok1(log_count == 0);
-               tdb_close(tdb);
-
-               if (flags[i] & TDB_INTERNAL)
-                       continue;
-
-               fd = open("run-seed.tdb", O_RDONLY);
-               ok1(fd >= 0);
-               ok1(read(fd, &hdr, sizeof(hdr)) == sizeof(hdr));
-               if (flags[i] & TDB_CONVERT)
-                       ok1(bswap_64(hdr.hash_seed) == 42);
-               else
-                       ok1(hdr.hash_seed == 42);
-               close(fd);
-       }
-       return exit_status();
-}
diff --git a/lib/tdb2/test/run-tdb_errorstr.c b/lib/tdb2/test/run-tdb_errorstr.c
deleted file mode 100644 (file)
index 7a2da25..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-#include "tdb2-source.h"
-#include "tap-interface.h"
-
-int main(int argc, char *argv[])
-{
-       enum TDB_ERROR e;
-       plan_tests(TDB_ERR_RDONLY*-1 + 2);
-
-       for (e = TDB_SUCCESS; e >= TDB_ERR_RDONLY; e--) {
-               switch (e) {
-               case TDB_SUCCESS:
-                       ok1(!strcmp(tdb_errorstr(e),
-                                   "Success"));
-                       break;
-               case TDB_ERR_IO:
-                       ok1(!strcmp(tdb_errorstr(e),
-                                   "IO Error"));
-                       break;
-               case TDB_ERR_LOCK:
-                       ok1(!strcmp(tdb_errorstr(e),
-                                   "Locking error"));
-                       break;
-               case TDB_ERR_OOM:
-                       ok1(!strcmp(tdb_errorstr(e),
-                                   "Out of memory"));
-                       break;
-               case TDB_ERR_EXISTS:
-                       ok1(!strcmp(tdb_errorstr(e),
-                                   "Record exists"));
-                       break;
-               case TDB_ERR_EINVAL:
-                       ok1(!strcmp(tdb_errorstr(e),
-                                   "Invalid parameter"));
-                       break;
-               case TDB_ERR_NOEXIST:
-                       ok1(!strcmp(tdb_errorstr(e),
-                                   "Record does not exist"));
-                       break;
-               case TDB_ERR_RDONLY:
-                       ok1(!strcmp(tdb_errorstr(e),
-                                   "write not permitted"));
-                       break;
-               case TDB_ERR_CORRUPT:
-                       ok1(!strcmp(tdb_errorstr(e),
-                                   "Corrupt database"));
-                       break;
-               }
-       }
-       ok1(!strcmp(tdb_errorstr(e), "Invalid error code"));
-
-       return exit_status();
-}
diff --git a/lib/tdb2/tools/Makefile b/lib/tdb2/tools/Makefile
deleted file mode 100644 (file)
index 11188c3..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-OBJS:=../../tdb2.o ../../hash.o ../../tally.o
-CFLAGS:=-I../../.. -I.. -Wall -g -O3 #-g -pg
-LDFLAGS:=-L../../..
-
-default: tdb2torture tdb2tool tdb2dump tdb2restore mktdb2 speed growtdb-bench
-
-tdb2dump: tdb2dump.c $(OBJS)
-tdb2restore: tdb2restore.c $(OBJS)
-tdb2torture: tdb2torture.c $(OBJS)
-tdb2tool: tdb2tool.c $(OBJS)
-mktdb2: mktdb2.c $(OBJS)
-speed: speed.c $(OBJS)
-growtdb-bench: growtdb-bench.c $(OBJS)
-
-clean:
-       rm -f tdb2torture tdb2dump tdb2restore tdb2tool mktdb2 speed growtdb-bench
diff --git a/lib/tdb2/tools/growtdb-bench.c b/lib/tdb2/tools/growtdb-bench.c
deleted file mode 100644 (file)
index 476e8be..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-#include "tdb2.h"
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <ccan/err/err.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-
-static void logfn(struct tdb_context *tdb,
-                 enum tdb_log_level level,
-                 enum TDB_ERROR ecode,
-                 const char *message,
-                 void *data)
-{
-       fprintf(stderr, "tdb:%s:%s:%s\n",
-               tdb_name(tdb), tdb_errorstr(ecode), message);
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j, users, groups;
-       TDB_DATA idxkey, idxdata;
-       TDB_DATA k, d, gk;
-       char cmd[100];
-       struct tdb_context *tdb;
-       enum TDB_ERROR ecode;
-       union tdb_attribute log;
-
-       if (argc != 3) {
-               printf("Usage: growtdb-bench <users> <groups>\n");
-               exit(1);
-       }
-       users = atoi(argv[1]);
-       groups = atoi(argv[2]);
-
-       sprintf(cmd, "cat /proc/%i/statm", getpid());
-
-       log.base.attr = TDB_ATTRIBUTE_LOG;
-       log.base.next = NULL;
-       log.log.fn = logfn;
-
-       tdb = tdb_open("/tmp/growtdb.tdb", TDB_DEFAULT,
-                      O_RDWR|O_CREAT|O_TRUNC, 0600, &log);
-
-       idxkey.dptr = (unsigned char *)"User index";
-       idxkey.dsize = strlen("User index");
-       idxdata.dsize = 51;
-       idxdata.dptr = calloc(idxdata.dsize, 1);
-
-       /* Create users. */
-       k.dsize = 48;
-       k.dptr = calloc(k.dsize, 1);
-       d.dsize = 64;
-       d.dptr = calloc(d.dsize, 1);
-
-       tdb_transaction_start(tdb);
-       for (i = 0; i < users; i++) {
-               memcpy(k.dptr, &i, sizeof(i));
-               ecode = tdb_store(tdb, k, d, TDB_INSERT);
-               if (ecode != TDB_SUCCESS)
-                       errx(1, "tdb insert failed: %s", tdb_errorstr(ecode));
-
-               /* This simulates a growing index record. */
-               ecode = tdb_append(tdb, idxkey, idxdata);
-               if (ecode != TDB_SUCCESS)
-                       errx(1, "tdb append failed: %s", tdb_errorstr(ecode));
-       }
-       if ((ecode = tdb_transaction_commit(tdb)) != 0)
-               errx(1, "tdb commit1 failed: %s", tdb_errorstr(ecode));
-
-       if ((ecode = tdb_check(tdb, NULL, NULL)) != 0)
-               errx(1, "tdb_check failed after initial insert!");
-
-       system(cmd);
-
-       /* Now put them all in groups: add 32 bytes to each record for
-        * a group. */
-       gk.dsize = 48;
-       gk.dptr = calloc(k.dsize, 1);
-       gk.dptr[gk.dsize-1] = 1;
-
-       d.dsize = 32;
-       for (i = 0; i < groups; i++) {
-               tdb_transaction_start(tdb);
-               /* Create the "group". */
-               memcpy(gk.dptr, &i, sizeof(i));
-               ecode = tdb_store(tdb, gk, d, TDB_INSERT);
-               if (ecode != TDB_SUCCESS)
-                       errx(1, "tdb insert failed: %s", tdb_errorstr(ecode));
-
-               /* Now populate it. */
-               for (j = 0; j < users; j++) {
-                       /* Append to the user. */
-                       memcpy(k.dptr, &j, sizeof(j));
-                       if ((ecode = tdb_append(tdb, k, d)) != 0)
-                               errx(1, "tdb append failed: %s",
-                                    tdb_errorstr(ecode));
-
-                       /* Append to the group. */
-                       if ((ecode = tdb_append(tdb, gk, d)) != 0)
-                               errx(1, "tdb append failed: %s",
-                                    tdb_errorstr(ecode));
-               }
-               if ((ecode = tdb_transaction_commit(tdb)) != 0)
-                       errx(1, "tdb commit2 failed: %s", tdb_errorstr(ecode));
-               if ((ecode = tdb_check(tdb, NULL, NULL)) != 0)
-                       errx(1, "tdb_check failed after iteration %i!", i);
-               system(cmd);
-       }
-
-       return 0;
-}
diff --git a/lib/tdb2/transaction.c b/lib/tdb2/transaction.c
deleted file mode 100644 (file)
index 2b71471..0000000
+++ /dev/null
@@ -1,1322 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              2005
-   Copyright (C) Rusty Russell                2010
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "private.h"
-#define SAFE_FREE(x) do { if ((x) != NULL) {free((void *)x); (x)=NULL;} } while(0)
-
-/*
-  transaction design:
-
-  - only allow a single transaction at a time per database. This makes
-    using the transaction API simpler, as otherwise the caller would
-    have to cope with temporary failures in transactions that conflict
-    with other current transactions
-
-  - keep the transaction recovery information in the same file as the
-    database, using a special 'transaction recovery' record pointed at
-    by the header. This removes the need for extra journal files as
-    used by some other databases
-
-  - dynamically allocated the transaction recover record, re-using it
-    for subsequent transactions. If a larger record is needed then
-    tdb_free() the old record to place it on the normal tdb freelist
-    before allocating the new record
-
-  - during transactions, keep a linked list of writes all that have
-    been performed by intercepting all tdb_write() calls. The hooked
-    transaction versions of tdb_read() and tdb_write() check this
-    linked list and try to use the elements of the list in preference
-    to the real database.
-
-  - don't allow any locks to be held when a transaction starts,
-    otherwise we can end up with deadlock (plus lack of lock nesting
-    in POSIX locks would mean the lock is lost)
-
-  - if the caller gains a lock during the transaction but doesn't
-    release it then fail the commit
-
-  - allow for nested calls to tdb_transaction_start(), re-using the
-    existing transaction record. If the inner transaction is canceled
-    then a subsequent commit will fail
-
-  - keep a mirrored copy of the tdb hash chain heads to allow for the
-    fast hash heads scan on traverse, updating the mirrored copy in
-    the transaction version of tdb_write
-
-  - allow callers to mix transaction and non-transaction use of tdb,
-    although once a transaction is started then an exclusive lock is
-    gained until the transaction is committed or canceled
-
-  - the commit stategy involves first saving away all modified data
-    into a linearised buffer in the transaction recovery area, then
-    marking the transaction recovery area with a magic value to
-    indicate a valid recovery record. In total 4 fsync/msync calls are
-    needed per commit to prevent race conditions. It might be possible
-    to reduce this to 3 or even 2 with some more work.
-
-  - check for a valid recovery record on open of the tdb, while the
-    open lock is held. Automatically recover from the transaction
-    recovery area if needed, then continue with the open as
-    usual. This allows for smooth crash recovery with no administrator
-    intervention.
-
-  - if TDB_NOSYNC is passed to flags in tdb_open then transactions are
-    still available, but no transaction recovery area is used and no
-    fsync/msync calls are made.
-*/
-
-/*
-  hold the context of any current transaction
-*/
-struct tdb_transaction {
-       /* the original io methods - used to do IOs to the real db */
-       const struct tdb_methods *io_methods;
-
-       /* the list of transaction blocks. When a block is first
-          written to, it gets created in this list */
-       uint8_t **blocks;
-       size_t num_blocks;
-       size_t last_block_size; /* number of valid bytes in the last block */
-
-       /* non-zero when an internal transaction error has
-          occurred. All write operations will then fail until the
-          transaction is ended */
-       int transaction_error;
-
-       /* when inside a transaction we need to keep track of any
-          nested tdb_transaction_start() calls, as these are allowed,
-          but don't create a new transaction */
-       unsigned int nesting;
-
-       /* set when a prepare has already occurred */
-       bool prepared;
-       tdb_off_t magic_offset;
-
-       /* old file size before transaction */
-       tdb_len_t old_map_size;
-};
-
-/* This doesn't really need to be pagesize, but we use it for similar reasons. */
-#define PAGESIZE 65536
-
-/*
-  read while in a transaction. We need to check first if the data is in our list
-  of transaction elements, then if not do a real read
-*/
-static enum TDB_ERROR transaction_read(struct tdb_context *tdb, tdb_off_t off,
-                                      void *buf, tdb_len_t len)
-{
-       size_t blk;
-       enum TDB_ERROR ecode;
-
-       /* break it down into block sized ops */
-       while (len + (off % PAGESIZE) > PAGESIZE) {
-               tdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
-               ecode = transaction_read(tdb, off, buf, len2);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-               len -= len2;
-               off += len2;
-               buf = (void *)(len2 + (char *)buf);
-       }
-
-       if (len == 0) {
-               return TDB_SUCCESS;
-       }
-
-       blk = off / PAGESIZE;
-
-       /* see if we have it in the block list */
-       if (tdb->transaction->num_blocks <= blk ||
-           tdb->transaction->blocks[blk] == NULL) {
-               /* nope, do a real read */
-               ecode = tdb->transaction->io_methods->tread(tdb, off, buf, len);
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-               return 0;
-       }
-
-       /* it is in the block list. Now check for the last block */
-       if (blk == tdb->transaction->num_blocks-1) {
-               if (len > tdb->transaction->last_block_size) {
-                       ecode = TDB_ERR_IO;
-                       goto fail;
-               }
-       }
-
-       /* now copy it out of this block */
-       memcpy(buf, tdb->transaction->blocks[blk] + (off % PAGESIZE), len);
-       return TDB_SUCCESS;
-
-fail:
-       tdb->transaction->transaction_error = 1;
-       return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                         "transaction_read: failed at off=%zu len=%zu",
-                         (size_t)off, (size_t)len);
-}
-
-
-/*
-  write while in a transaction
-*/
-static enum TDB_ERROR transaction_write(struct tdb_context *tdb, tdb_off_t off,
-                                       const void *buf, tdb_len_t len)
-{
-       size_t blk;
-       enum TDB_ERROR ecode;
-
-       /* Only a commit is allowed on a prepared transaction */
-       if (tdb->transaction->prepared) {
-               ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_ERROR,
-                                  "transaction_write: transaction already"
-                                  " prepared, write not allowed");
-               goto fail;
-       }
-
-       /* break it up into block sized chunks */
-       while (len + (off % PAGESIZE) > PAGESIZE) {
-               tdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
-               ecode = transaction_write(tdb, off, buf, len2);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-               len -= len2;
-               off += len2;
-               if (buf != NULL) {
-                       buf = (const void *)(len2 + (const char *)buf);
-               }
-       }
-
-       if (len == 0) {
-               return TDB_SUCCESS;
-       }
-
-       blk = off / PAGESIZE;
-       off = off % PAGESIZE;
-
-       if (tdb->transaction->num_blocks <= blk) {
-               uint8_t **new_blocks;
-               /* expand the blocks array */
-               if (tdb->transaction->blocks == NULL) {
-                       new_blocks = (uint8_t **)malloc(
-                               (blk+1)*sizeof(uint8_t *));
-               } else {
-                       new_blocks = (uint8_t **)realloc(
-                               tdb->transaction->blocks,
-                               (blk+1)*sizeof(uint8_t *));
-               }
-               if (new_blocks == NULL) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                          "transaction_write:"
-                                          " failed to allocate");
-                       goto fail;
-               }
-               memset(&new_blocks[tdb->transaction->num_blocks], 0,
-                      (1+(blk - tdb->transaction->num_blocks))*sizeof(uint8_t *));
-               tdb->transaction->blocks = new_blocks;
-               tdb->transaction->num_blocks = blk+1;
-               tdb->transaction->last_block_size = 0;
-       }
-
-       /* allocate and fill a block? */
-       if (tdb->transaction->blocks[blk] == NULL) {
-               tdb->transaction->blocks[blk] = (uint8_t *)calloc(PAGESIZE, 1);
-               if (tdb->transaction->blocks[blk] == NULL) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                          "transaction_write:"
-                                          " failed to allocate");
-                       goto fail;
-               }
-               if (tdb->transaction->old_map_size > blk * PAGESIZE) {
-                       tdb_len_t len2 = PAGESIZE;
-                       if (len2 + (blk * PAGESIZE) > tdb->transaction->old_map_size) {
-                               len2 = tdb->transaction->old_map_size - (blk * PAGESIZE);
-                       }
-                       ecode = tdb->transaction->io_methods->tread(tdb,
-                                       blk * PAGESIZE,
-                                       tdb->transaction->blocks[blk],
-                                       len2);
-                       if (ecode != TDB_SUCCESS) {
-                               ecode = tdb_logerr(tdb, ecode,
-                                                  TDB_LOG_ERROR,
-                                                  "transaction_write:"
-                                                  " failed to"
-                                                  " read old block: %s",
-                                                  strerror(errno));
-                               SAFE_FREE(tdb->transaction->blocks[blk]);
-                               goto fail;
-                       }
-                       if (blk == tdb->transaction->num_blocks-1) {
-                               tdb->transaction->last_block_size = len2;
-                       }
-               }
-       }
-
-       /* overwrite part of an existing block */
-       if (buf == NULL) {
-               memset(tdb->transaction->blocks[blk] + off, 0, len);
-       } else {
-               memcpy(tdb->transaction->blocks[blk] + off, buf, len);
-       }
-       if (blk == tdb->transaction->num_blocks-1) {
-               if (len + off > tdb->transaction->last_block_size) {
-                       tdb->transaction->last_block_size = len + off;
-               }
-       }
-
-       return TDB_SUCCESS;
-
-fail:
-       tdb->transaction->transaction_error = 1;
-       return ecode;
-}
-
-
-/*
-  write while in a transaction - this variant never expands the transaction blocks, it only
-  updates existing blocks. This means it cannot change the recovery size
-*/
-static void transaction_write_existing(struct tdb_context *tdb, tdb_off_t off,
-                                      const void *buf, tdb_len_t len)
-{
-       size_t blk;
-
-       /* break it up into block sized chunks */
-       while (len + (off % PAGESIZE) > PAGESIZE) {
-               tdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
-               transaction_write_existing(tdb, off, buf, len2);
-               len -= len2;
-               off += len2;
-               if (buf != NULL) {
-                       buf = (const void *)(len2 + (const char *)buf);
-               }
-       }
-
-       if (len == 0) {
-               return;
-       }
-
-       blk = off / PAGESIZE;
-       off = off % PAGESIZE;
-
-       if (tdb->transaction->num_blocks <= blk ||
-           tdb->transaction->blocks[blk] == NULL) {
-               return;
-       }
-
-       if (blk == tdb->transaction->num_blocks-1 &&
-           off + len > tdb->transaction->last_block_size) {
-               if (off >= tdb->transaction->last_block_size) {
-                       return;
-               }
-               len = tdb->transaction->last_block_size - off;
-       }
-
-       /* overwrite part of an existing block */
-       memcpy(tdb->transaction->blocks[blk] + off, buf, len);
-}
-
-
-/*
-  out of bounds check during a transaction
-*/
-static enum TDB_ERROR transaction_oob(struct tdb_context *tdb,
-                                     tdb_off_t off, tdb_len_t len, bool probe)
-{
-       if ((off + len >= off && off + len <= tdb->file->map_size) || probe) {
-               return TDB_SUCCESS;
-       }
-
-       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                  "tdb_oob len %lld beyond transaction size %lld",
-                  (long long)(off + len),
-                  (long long)tdb->file->map_size);
-       return TDB_ERR_IO;
-}
-
-/*
-  transaction version of tdb_expand().
-*/
-static enum TDB_ERROR transaction_expand_file(struct tdb_context *tdb,
-                                             tdb_off_t addition)
-{
-       enum TDB_ERROR ecode;
-
-       /* add a write to the transaction elements, so subsequent
-          reads see the zero data */
-       ecode = transaction_write(tdb, tdb->file->map_size, NULL, addition);
-       if (ecode == TDB_SUCCESS) {
-               tdb->file->map_size += addition;
-       }
-       return ecode;
-}
-
-static void *transaction_direct(struct tdb_context *tdb, tdb_off_t off,
-                               size_t len, bool write_mode)
-{
-       size_t blk = off / PAGESIZE, end_blk;
-
-       /* This is wrong for zero-length blocks, but will fail gracefully */
-       end_blk = (off + len - 1) / PAGESIZE;
-
-       /* Can only do direct if in single block and we've already copied. */
-       if (write_mode) {
-               tdb->stats.transaction_write_direct++;
-               if (blk != end_blk
-                   || blk >= tdb->transaction->num_blocks
-                   || tdb->transaction->blocks[blk] == NULL) {
-                       tdb->stats.transaction_write_direct_fail++;
-                       return NULL;
-               }
-               return tdb->transaction->blocks[blk] + off % PAGESIZE;
-       }
-
-       tdb->stats.transaction_read_direct++;
-       /* Single which we have copied? */
-       if (blk == end_blk
-           && blk < tdb->transaction->num_blocks
-           && tdb->transaction->blocks[blk])
-               return tdb->transaction->blocks[blk] + off % PAGESIZE;
-
-       /* Otherwise must be all not copied. */
-       while (blk <= end_blk) {
-               if (blk >= tdb->transaction->num_blocks)
-                       break;
-               if (tdb->transaction->blocks[blk]) {
-                       tdb->stats.transaction_read_direct_fail++;
-                       return NULL;
-               }
-               blk++;
-       }
-       return tdb->transaction->io_methods->direct(tdb, off, len, false);
-}
-
-static const struct tdb_methods transaction_methods = {
-       transaction_read,
-       transaction_write,
-       transaction_oob,
-       transaction_expand_file,
-       transaction_direct,
-};
-
-/*
-  sync to disk
-*/
-static enum TDB_ERROR transaction_sync(struct tdb_context *tdb,
-                                      tdb_off_t offset, tdb_len_t length)
-{
-       if (tdb->flags & TDB_NOSYNC) {
-               return TDB_SUCCESS;
-       }
-
-       if (fsync(tdb->file->fd) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_transaction: fsync failed: %s",
-                                 strerror(errno));
-       }
-#ifdef MS_SYNC
-       if (tdb->file->map_ptr) {
-               tdb_off_t moffset = offset & ~(getpagesize()-1);
-               if (msync(moffset + (char *)tdb->file->map_ptr,
-                         length + (offset - moffset), MS_SYNC) != 0) {
-                       return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                         "tdb_transaction: msync failed: %s",
-                                         strerror(errno));
-               }
-       }
-#endif
-       return TDB_SUCCESS;
-}
-
-
-static void _tdb_transaction_cancel(struct tdb_context *tdb)
-{
-       int i;
-       enum TDB_ERROR ecode;
-
-       if (tdb->transaction == NULL) {
-               tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                          "tdb_transaction_cancel: no transaction");
-               return;
-       }
-
-       if (tdb->transaction->nesting != 0) {
-               tdb->transaction->transaction_error = 1;
-               tdb->transaction->nesting--;
-               return;
-       }
-
-       tdb->file->map_size = tdb->transaction->old_map_size;
-
-       /* free all the transaction blocks */
-       for (i=0;i<tdb->transaction->num_blocks;i++) {
-               if (tdb->transaction->blocks[i] != NULL) {
-                       free(tdb->transaction->blocks[i]);
-               }
-       }
-       SAFE_FREE(tdb->transaction->blocks);
-
-       if (tdb->transaction->magic_offset) {
-               const struct tdb_methods *methods = tdb->transaction->io_methods;
-               uint64_t invalid = TDB_RECOVERY_INVALID_MAGIC;
-
-               /* remove the recovery marker */
-               ecode = methods->twrite(tdb, tdb->transaction->magic_offset,
-                                       &invalid, sizeof(invalid));
-               if (ecode == TDB_SUCCESS)
-                       ecode = transaction_sync(tdb,
-                                                tdb->transaction->magic_offset,
-                                                sizeof(invalid));
-               if (ecode != TDB_SUCCESS) {
-                       tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                  "tdb_transaction_cancel: failed to remove"
-                                  " recovery magic");
-               }
-       }
-
-       if (tdb->file->allrecord_lock.count)
-               tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype);
-
-       /* restore the normal io methods */
-       tdb->io = tdb->transaction->io_methods;
-
-       tdb_transaction_unlock(tdb, F_WRLCK);
-
-       if (tdb_has_open_lock(tdb))
-               tdb_unlock_open(tdb, F_WRLCK);
-
-       SAFE_FREE(tdb->transaction);
-}
-
-/*
-  start a tdb transaction. No token is returned, as only a single
-  transaction is allowed to be pending per tdb_context
-*/
-_PUBLIC_ enum TDB_ERROR tdb_transaction_start(struct tdb_context *tdb)
-{
-       enum TDB_ERROR ecode;
-
-       tdb->stats.transactions++;
-       /* some sanity checks */
-       if (tdb->flags & TDB_INTERNAL) {
-               return tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                                   TDB_LOG_USE_ERROR,
-                                                   "tdb_transaction_start:"
-                                                   " cannot start a"
-                                                   " transaction on an"
-                                                   " internal tdb");
-       }
-
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb->last_error = tdb_logerr(tdb, TDB_ERR_RDONLY,
-                                                   TDB_LOG_USE_ERROR,
-                                                   "tdb_transaction_start:"
-                                                   " cannot start a"
-                                                   " transaction on a "
-                                                   " read-only tdb");
-       }
-
-       /* cope with nested tdb_transaction_start() calls */
-       if (tdb->transaction != NULL) {
-               if (!(tdb->flags & TDB_ALLOW_NESTING)) {
-                       return tdb->last_error
-                               = tdb_logerr(tdb, TDB_ERR_IO,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_transaction_start:"
-                                            " already inside transaction");
-               }
-               tdb->transaction->nesting++;
-               tdb->stats.transaction_nest++;
-               return 0;
-       }
-
-       if (tdb_has_hash_locks(tdb)) {
-               /* the caller must not have any locks when starting a
-                  transaction as otherwise we'll be screwed by lack
-                  of nested locks in POSIX */
-               return tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK,
-                                                   TDB_LOG_USE_ERROR,
-                                                   "tdb_transaction_start:"
-                                                   " cannot start a"
-                                                   " transaction with locks"
-                                                   " held");
-       }
-
-       tdb->transaction = (struct tdb_transaction *)
-               calloc(sizeof(struct tdb_transaction), 1);
-       if (tdb->transaction == NULL) {
-               return tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM,
-                                                   TDB_LOG_ERROR,
-                                                   "tdb_transaction_start:"
-                                                   " cannot allocate");
-       }
-
-       /* get the transaction write lock. This is a blocking lock. As
-          discussed with Volker, there are a number of ways we could
-          make this async, which we will probably do in the future */
-       ecode = tdb_transaction_lock(tdb, F_WRLCK);
-       if (ecode != TDB_SUCCESS) {
-               SAFE_FREE(tdb->transaction->blocks);
-               SAFE_FREE(tdb->transaction);
-               return tdb->last_error = ecode;
-       }
-
-       /* get a read lock over entire file. This is upgraded to a write
-          lock during the commit */
-       ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, true);
-       if (ecode != TDB_SUCCESS) {
-               goto fail_allrecord_lock;
-       }
-
-       /* make sure we know about any file expansions already done by
-          anyone else */
-       tdb->io->oob(tdb, tdb->file->map_size, 1, true);
-       tdb->transaction->old_map_size = tdb->file->map_size;
-
-       /* finally hook the io methods, replacing them with
-          transaction specific methods */
-       tdb->transaction->io_methods = tdb->io;
-       tdb->io = &transaction_methods;
-       return tdb->last_error = TDB_SUCCESS;
-
-fail_allrecord_lock:
-       tdb_transaction_unlock(tdb, F_WRLCK);
-       SAFE_FREE(tdb->transaction->blocks);
-       SAFE_FREE(tdb->transaction);
-       return tdb->last_error = ecode;
-}
-
-
-/*
-  cancel the current transaction
-*/
-_PUBLIC_ void tdb_transaction_cancel(struct tdb_context *tdb)
-{
-       tdb->stats.transaction_cancel++;
-       _tdb_transaction_cancel(tdb);
-}
-
-/*
-  work out how much space the linearised recovery data will consume (worst case)
-*/
-static tdb_len_t tdb_recovery_size(struct tdb_context *tdb)
-{
-       tdb_len_t recovery_size = 0;
-       int i;
-
-       recovery_size = 0;
-       for (i=0;i<tdb->transaction->num_blocks;i++) {
-               if (i * PAGESIZE >= tdb->transaction->old_map_size) {
-                       break;
-               }
-               if (tdb->transaction->blocks[i] == NULL) {
-                       continue;
-               }
-               recovery_size += 2*sizeof(tdb_off_t);
-               if (i == tdb->transaction->num_blocks-1) {
-                       recovery_size += tdb->transaction->last_block_size;
-               } else {
-                       recovery_size += PAGESIZE;
-               }
-       }
-
-       return recovery_size;
-}
-
-static enum TDB_ERROR tdb_recovery_area(struct tdb_context *tdb,
-                                       const struct tdb_methods *methods,
-                                       tdb_off_t *recovery_offset,
-                                       struct tdb_recovery_record *rec)
-{
-       enum TDB_ERROR ecode;
-
-       *recovery_offset = tdb_read_off(tdb,
-                                       offsetof(struct tdb_header, recovery));
-       if (TDB_OFF_IS_ERR(*recovery_offset)) {
-               return TDB_OFF_TO_ERR(*recovery_offset);
-       }
-
-       if (*recovery_offset == 0) {
-               rec->max_len = 0;
-               return TDB_SUCCESS;
-       }
-
-       ecode = methods->tread(tdb, *recovery_offset, rec, sizeof(*rec));
-       if (ecode != TDB_SUCCESS)
-               return ecode;
-
-       tdb_convert(tdb, rec, sizeof(*rec));
-       /* ignore invalid recovery regions: can happen in crash */
-       if (rec->magic != TDB_RECOVERY_MAGIC &&
-           rec->magic != TDB_RECOVERY_INVALID_MAGIC) {
-               *recovery_offset = 0;
-               rec->max_len = 0;
-       }
-       return TDB_SUCCESS;
-}
-
-static unsigned int same(const unsigned char *new,
-                        const unsigned char *old,
-                        unsigned int length)
-{
-       unsigned int i;
-
-       for (i = 0; i < length; i++) {
-               if (new[i] != old[i])
-                       break;
-       }
-       return i;
-}
-
-static unsigned int different(const unsigned char *new,
-                             const unsigned char *old,
-                             unsigned int length,
-                             unsigned int min_same,
-                             unsigned int *samelen)
-{
-       unsigned int i;
-
-       *samelen = 0;
-       for (i = 0; i < length; i++) {
-               if (new[i] == old[i]) {
-                       (*samelen)++;
-               } else {
-                       if (*samelen >= min_same) {
-                               return i - *samelen;
-                       }
-                       *samelen = 0;
-               }
-       }
-
-       if (*samelen < min_same)
-               *samelen = 0;
-       return length - *samelen;
-}
-
-/* Allocates recovery blob, without tdb_recovery_record at head set up. */
-static struct tdb_recovery_record *alloc_recovery(struct tdb_context *tdb,
-                                                 tdb_len_t *len)
-{
-       struct tdb_recovery_record *rec;
-       size_t i;
-       enum TDB_ERROR ecode;
-       unsigned char *p;
-       const struct tdb_methods *old_methods = tdb->io;
-
-       rec = malloc(sizeof(*rec) + tdb_recovery_size(tdb));
-       if (!rec) {
-               tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                          "transaction_setup_recovery:"
-                          " cannot allocate");
-               return TDB_ERR_PTR(TDB_ERR_OOM);
-       }
-
-       /* We temporarily revert to the old I/O methods, so we can use
-        * tdb_access_read */
-       tdb->io = tdb->transaction->io_methods;
-
-       /* build the recovery data into a single blob to allow us to do a single
-          large write, which should be more efficient */
-       p = (unsigned char *)(rec + 1);
-       for (i=0;i<tdb->transaction->num_blocks;i++) {
-               tdb_off_t offset;
-               tdb_len_t length;
-               unsigned int off;
-               const unsigned char *buffer;
-
-               if (tdb->transaction->blocks[i] == NULL) {
-                       continue;
-               }
-
-               offset = i * PAGESIZE;
-               length = PAGESIZE;
-               if (i == tdb->transaction->num_blocks-1) {
-                       length = tdb->transaction->last_block_size;
-               }
-
-               if (offset >= tdb->transaction->old_map_size) {
-                       continue;
-               }
-
-               if (offset + length > tdb->file->map_size) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                          "tdb_transaction_setup_recovery:"
-                                          " transaction data over new region"
-                                          " boundary");
-                       goto fail;
-               }
-               if (offset + length > tdb->transaction->old_map_size) {
-                       /* Short read at EOF. */
-                       length = tdb->transaction->old_map_size - offset;
-               }
-               buffer = tdb_access_read(tdb, offset, length, false);
-               if (TDB_PTR_IS_ERR(buffer)) {
-                       ecode = TDB_PTR_ERR(buffer);
-                       goto fail;
-               }
-
-               /* Skip over anything the same at the start. */
-               off = same(tdb->transaction->blocks[i], buffer, length);
-               offset += off;
-
-               while (off < length) {
-                       tdb_len_t len1;
-                       unsigned int samelen;
-
-                       len1 = different(tdb->transaction->blocks[i] + off,
-                                       buffer + off, length - off,
-                                       sizeof(offset) + sizeof(len1) + 1,
-                                       &samelen);
-
-                       memcpy(p, &offset, sizeof(offset));
-                       memcpy(p + sizeof(offset), &len1, sizeof(len1));
-                       tdb_convert(tdb, p, sizeof(offset) + sizeof(len1));
-                       p += sizeof(offset) + sizeof(len1);
-                       memcpy(p, buffer + off, len1);
-                       p += len1;
-                       off += len1 + samelen;
-                       offset += len1 + samelen;
-               }
-               tdb_access_release(tdb, buffer);
-       }
-
-       *len = p - (unsigned char *)(rec + 1);
-       tdb->io = old_methods;
-       return rec;
-
-fail:
-       free(rec);
-       tdb->io = old_methods;
-       return TDB_ERR_PTR(ecode);
-}
-
-static tdb_off_t create_recovery_area(struct tdb_context *tdb,
-                                     tdb_len_t rec_length,
-                                     struct tdb_recovery_record *rec)
-{
-       tdb_off_t off, recovery_off;
-       tdb_len_t addition;
-       enum TDB_ERROR ecode;
-       const struct tdb_methods *methods = tdb->transaction->io_methods;
-
-       /* round up to a multiple of page size. Overallocate, since each
-        * such allocation forces us to expand the file. */
-       rec->max_len = tdb_expand_adjust(tdb->file->map_size, rec_length);
-
-       /* Round up to a page. */
-       rec->max_len = ((sizeof(*rec) + rec->max_len + PAGESIZE-1)
-                       & ~(PAGESIZE-1))
-               - sizeof(*rec);
-
-       off = tdb->file->map_size;
-
-       /* Restore ->map_size before calling underlying expand_file.
-          Also so that we don't try to expand the file again in the
-          transaction commit, which would destroy the recovery
-          area */
-       addition = (tdb->file->map_size - tdb->transaction->old_map_size) +
-               sizeof(*rec) + rec->max_len;
-       tdb->file->map_size = tdb->transaction->old_map_size;
-       tdb->stats.transaction_expand_file++;
-       ecode = methods->expand_file(tdb, addition);
-       if (ecode != TDB_SUCCESS) {
-               tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                          "tdb_recovery_allocate:"
-                          " failed to create recovery area");
-               return TDB_ERR_TO_OFF(ecode);
-       }
-
-       /* we have to reset the old map size so that we don't try to
-          expand the file again in the transaction commit, which
-          would destroy the recovery area */
-       tdb->transaction->old_map_size = tdb->file->map_size;
-
-       /* write the recovery header offset and sync - we can sync without a race here
-          as the magic ptr in the recovery record has not been set */
-       recovery_off = off;
-       tdb_convert(tdb, &recovery_off, sizeof(recovery_off));
-       ecode = methods->twrite(tdb, offsetof(struct tdb_header, recovery),
-                               &recovery_off, sizeof(tdb_off_t));
-       if (ecode != TDB_SUCCESS) {
-               tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                          "tdb_recovery_allocate:"
-                          " failed to write recovery head");
-               return TDB_ERR_TO_OFF(ecode);
-       }
-       transaction_write_existing(tdb, offsetof(struct tdb_header, recovery),
-                                  &recovery_off,
-                                  sizeof(tdb_off_t));
-       return off;
-}
-
-/*
-  setup the recovery data that will be used on a crash during commit
-*/
-static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb)
-{
-       tdb_len_t recovery_size = 0;
-       tdb_off_t recovery_off = 0;
-       tdb_off_t old_map_size = tdb->transaction->old_map_size;
-       struct tdb_recovery_record *recovery;
-       const struct tdb_methods *methods = tdb->transaction->io_methods;
-       uint64_t magic;
-       enum TDB_ERROR ecode;
-
-       recovery = alloc_recovery(tdb, &recovery_size);
-       if (TDB_PTR_IS_ERR(recovery))
-               return TDB_PTR_ERR(recovery);
-
-       ecode = tdb_recovery_area(tdb, methods, &recovery_off, recovery);
-       if (ecode) {
-               free(recovery);
-               return ecode;
-       }
-
-       if (recovery->max_len < recovery_size) {
-               /* Not large enough. Free up old recovery area. */
-               if (recovery_off) {
-                       tdb->stats.frees++;
-                       ecode = add_free_record(tdb, recovery_off,
-                                               sizeof(*recovery)
-                                               + recovery->max_len,
-                                               TDB_LOCK_WAIT, true);
-                       free(recovery);
-                       if (ecode != TDB_SUCCESS) {
-                               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                                 "tdb_recovery_allocate:"
-                                                 " failed to free previous"
-                                                 " recovery area");
-                       }
-
-                       /* Refresh recovery after add_free_record above. */
-                       recovery = alloc_recovery(tdb, &recovery_size);
-                       if (TDB_PTR_IS_ERR(recovery))
-                               return TDB_PTR_ERR(recovery);
-               }
-
-               recovery_off = create_recovery_area(tdb, recovery_size,
-                                                   recovery);
-               if (TDB_OFF_IS_ERR(recovery_off)) {
-                       free(recovery);
-                       return TDB_OFF_TO_ERR(recovery_off);
-               }
-       }
-
-       /* Now we know size, convert rec header. */
-       recovery->magic = TDB_RECOVERY_INVALID_MAGIC;
-       recovery->len = recovery_size;
-       recovery->eof = old_map_size;
-       tdb_convert(tdb, recovery, sizeof(*recovery));
-
-       /* write the recovery data to the recovery area */
-       ecode = methods->twrite(tdb, recovery_off, recovery, recovery_size);
-       if (ecode != TDB_SUCCESS) {
-               free(recovery);
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_setup_recovery:"
-                                 " failed to write recovery data");
-       }
-       transaction_write_existing(tdb, recovery_off, recovery, recovery_size);
-
-       free(recovery);
-
-       /* as we don't have ordered writes, we have to sync the recovery
-          data before we update the magic to indicate that the recovery
-          data is present */
-       ecode = transaction_sync(tdb, recovery_off, recovery_size);
-       if (ecode != TDB_SUCCESS)
-               return ecode;
-
-       magic = TDB_RECOVERY_MAGIC;
-       tdb_convert(tdb, &magic, sizeof(magic));
-
-       tdb->transaction->magic_offset
-               = recovery_off + offsetof(struct tdb_recovery_record, magic);
-
-       ecode = methods->twrite(tdb, tdb->transaction->magic_offset,
-                               &magic, sizeof(magic));
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_setup_recovery:"
-                                 " failed to write recovery magic");
-       }
-       transaction_write_existing(tdb, tdb->transaction->magic_offset,
-                                  &magic, sizeof(magic));
-
-       /* ensure the recovery magic marker is on disk */
-       return transaction_sync(tdb, tdb->transaction->magic_offset,
-                               sizeof(magic));
-}
-
-static enum TDB_ERROR _tdb_transaction_prepare_commit(struct tdb_context *tdb)
-{
-       const struct tdb_methods *methods;
-       enum TDB_ERROR ecode;
-
-       if (tdb->transaction == NULL) {
-               return tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                 "tdb_transaction_prepare_commit:"
-                                 " no transaction");
-       }
-
-       if (tdb->transaction->prepared) {
-               _tdb_transaction_cancel(tdb);
-               return tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                 "tdb_transaction_prepare_commit:"
-                                 " transaction already prepared");
-       }
-
-       if (tdb->transaction->transaction_error) {
-               _tdb_transaction_cancel(tdb);
-               return tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_ERROR,
-                                 "tdb_transaction_prepare_commit:"
-                                 " transaction error pending");
-       }
-
-
-       if (tdb->transaction->nesting != 0) {
-               return TDB_SUCCESS;
-       }
-
-       /* check for a null transaction */
-       if (tdb->transaction->blocks == NULL) {
-               return TDB_SUCCESS;
-       }
-
-       methods = tdb->transaction->io_methods;
-
-       /* upgrade the main transaction lock region to a write lock */
-       ecode = tdb_allrecord_upgrade(tdb, TDB_HASH_LOCK_START);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* get the open lock - this prevents new users attaching to the database
-          during the commit */
-       ecode = tdb_lock_open(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* Since we have whole db locked, we don't need the expansion lock. */
-       if (!(tdb->flags & TDB_NOSYNC)) {
-               /* Sets up tdb->transaction->recovery and
-                * tdb->transaction->magic_offset. */
-               ecode = transaction_setup_recovery(tdb);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-       }
-
-       tdb->transaction->prepared = true;
-
-       /* expand the file to the new size if needed */
-       if (tdb->file->map_size != tdb->transaction->old_map_size) {
-               tdb_len_t add;
-
-               add = tdb->file->map_size - tdb->transaction->old_map_size;
-               /* Restore original map size for tdb_expand_file */
-               tdb->file->map_size = tdb->transaction->old_map_size;
-               ecode = methods->expand_file(tdb, add);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-       }
-
-       /* Keep the open lock until the actual commit */
-       return TDB_SUCCESS;
-}
-
-/*
-   prepare to commit the current transaction
-*/
-_PUBLIC_ enum TDB_ERROR tdb_transaction_prepare_commit(struct tdb_context *tdb)
-{
-       return tdb->last_error = _tdb_transaction_prepare_commit(tdb);
-}
-
-/*
-  commit the current transaction
-*/
-_PUBLIC_ enum TDB_ERROR tdb_transaction_commit(struct tdb_context *tdb)
-{
-       const struct tdb_methods *methods;
-       int i;
-       enum TDB_ERROR ecode;
-
-       if (tdb->transaction == NULL) {
-               return tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                                   TDB_LOG_USE_ERROR,
-                                                   "tdb_transaction_commit:"
-                                                   " no transaction");
-       }
-
-       tdb_trace(tdb, "tdb_transaction_commit");
-
-       if (tdb->transaction->nesting != 0) {
-               tdb->transaction->nesting--;
-               return tdb->last_error = TDB_SUCCESS;
-       }
-
-       /* check for a null transaction */
-       if (tdb->transaction->blocks == NULL) {
-               _tdb_transaction_cancel(tdb);
-               return tdb->last_error = TDB_SUCCESS;
-       }
-
-       if (!tdb->transaction->prepared) {
-               ecode = _tdb_transaction_prepare_commit(tdb);
-               if (ecode != TDB_SUCCESS) {
-                       _tdb_transaction_cancel(tdb);
-                       return tdb->last_error = ecode;
-               }
-       }
-
-       methods = tdb->transaction->io_methods;
-
-       /* perform all the writes */
-       for (i=0;i<tdb->transaction->num_blocks;i++) {
-               tdb_off_t offset;
-               tdb_len_t length;
-
-               if (tdb->transaction->blocks[i] == NULL) {
-                       continue;
-               }
-
-               offset = i * PAGESIZE;
-               length = PAGESIZE;
-               if (i == tdb->transaction->num_blocks-1) {
-                       length = tdb->transaction->last_block_size;
-               }
-
-               ecode = methods->twrite(tdb, offset,
-                                       tdb->transaction->blocks[i], length);
-               if (ecode != TDB_SUCCESS) {
-                       /* we've overwritten part of the data and
-                          possibly expanded the file, so we need to
-                          run the crash recovery code */
-                       tdb->io = methods;
-                       tdb_transaction_recover(tdb);
-
-                       _tdb_transaction_cancel(tdb);
-
-                       return tdb->last_error = ecode;
-               }
-               SAFE_FREE(tdb->transaction->blocks[i]);
-       }
-
-       SAFE_FREE(tdb->transaction->blocks);
-       tdb->transaction->num_blocks = 0;
-
-       /* ensure the new data is on disk */
-       ecode = transaction_sync(tdb, 0, tdb->file->map_size);
-       if (ecode != TDB_SUCCESS) {
-               return tdb->last_error = ecode;
-       }
-
-       /*
-         TODO: maybe write to some dummy hdr field, or write to magic
-         offset without mmap, before the last sync, instead of the
-         utime() call
-       */
-
-       /* on some systems (like Linux 2.6.x) changes via mmap/msync
-          don't change the mtime of the file, this means the file may
-          not be backed up (as tdb rounding to block sizes means that
-          file size changes are quite rare too). The following forces
-          mtime changes when a transaction completes */
-#if HAVE_UTIME
-       utime(tdb->name, NULL);
-#endif
-
-       /* use a transaction cancel to free memory and remove the
-          transaction locks: it "restores" map_size, too. */
-       tdb->transaction->old_map_size = tdb->file->map_size;
-       _tdb_transaction_cancel(tdb);
-
-       return tdb->last_error = TDB_SUCCESS;
-}
-
-
-/*
-  recover from an aborted transaction. Must be called with exclusive
-  database write access already established (including the open
-  lock to prevent new processes attaching)
-*/
-enum TDB_ERROR tdb_transaction_recover(struct tdb_context *tdb)
-{
-       tdb_off_t recovery_head, recovery_eof;
-       unsigned char *data, *p;
-       struct tdb_recovery_record rec;
-       enum TDB_ERROR ecode;
-
-       /* find the recovery area */
-       recovery_head = tdb_read_off(tdb, offsetof(struct tdb_header,recovery));
-       if (TDB_OFF_IS_ERR(recovery_head)) {
-               ecode = TDB_OFF_TO_ERR(recovery_head);
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to read recovery head");
-       }
-
-       if (recovery_head == 0) {
-               /* we have never allocated a recovery record */
-               return TDB_SUCCESS;
-       }
-
-       /* read the recovery record */
-       ecode = tdb_read_convert(tdb, recovery_head, &rec, sizeof(rec));
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to read recovery record");
-       }
-
-       if (rec.magic != TDB_RECOVERY_MAGIC) {
-               /* there is no valid recovery data */
-               return TDB_SUCCESS;
-       }
-
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " attempt to recover read only database");
-       }
-
-       recovery_eof = rec.eof;
-
-       data = (unsigned char *)malloc(rec.len);
-       if (data == NULL) {
-               return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to allocate recovery data");
-       }
-
-       /* read the full recovery data */
-       ecode = tdb->io->tread(tdb, recovery_head + sizeof(rec), data,
-                                   rec.len);
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to read recovery data");
-       }
-
-       /* recover the file data */
-       p = data;
-       while (p+sizeof(tdb_off_t)+sizeof(tdb_len_t) < data + rec.len) {
-               tdb_off_t ofs;
-               tdb_len_t len;
-               tdb_convert(tdb, p, sizeof(ofs) + sizeof(len));
-               memcpy(&ofs, p, sizeof(ofs));
-               memcpy(&len, p + sizeof(ofs), sizeof(len));
-               p += sizeof(ofs) + sizeof(len);
-
-               ecode = tdb->io->twrite(tdb, ofs, p, len);
-               if (ecode != TDB_SUCCESS) {
-                       free(data);
-                       return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                         "tdb_transaction_recover:"
-                                         " failed to recover %zu bytes"
-                                         " at offset %zu",
-                                         (size_t)len, (size_t)ofs);
-               }
-               p += len;
-       }
-
-       free(data);
-
-       ecode = transaction_sync(tdb, 0, tdb->file->map_size);
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to sync recovery");
-       }
-
-       /* if the recovery area is after the recovered eof then remove it */
-       if (recovery_eof <= recovery_head) {
-               ecode = tdb_write_off(tdb, offsetof(struct tdb_header,
-                                                   recovery),
-                                     0);
-               if (ecode != TDB_SUCCESS) {
-                       return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                         "tdb_transaction_recover:"
-                                         " failed to remove recovery head");
-               }
-       }
-
-       /* remove the recovery magic */
-       ecode = tdb_write_off(tdb,
-                             recovery_head
-                             + offsetof(struct tdb_recovery_record, magic),
-                             TDB_RECOVERY_INVALID_MAGIC);
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to remove recovery magic");
-       }
-
-       ecode = transaction_sync(tdb, 0, recovery_eof);
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to sync2 recovery");
-       }
-
-       tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                  "tdb_transaction_recover: recovered %zu byte database",
-                  (size_t)recovery_eof);
-
-       /* all done */
-       return TDB_SUCCESS;
-}
-
-tdb_bool_err tdb_needs_recovery(struct tdb_context *tdb)
-{
-       tdb_off_t recovery_head;
-       struct tdb_recovery_record rec;
-       enum TDB_ERROR ecode;
-
-       /* find the recovery area */
-       recovery_head = tdb_read_off(tdb, offsetof(struct tdb_header,recovery));
-       if (TDB_OFF_IS_ERR(recovery_head)) {
-               return recovery_head;
-       }
-
-       if (recovery_head == 0) {
-               /* we have never allocated a recovery record */
-               return false;
-       }
-
-       /* read the recovery record */
-       ecode = tdb_read_convert(tdb, recovery_head, &rec, sizeof(rec));
-       if (ecode != TDB_SUCCESS) {
-               return TDB_ERR_TO_OFF(ecode);
-       }
-
-       return (rec.magic == TDB_RECOVERY_MAGIC);
-}
diff --git a/lib/tdb2/traverse.c b/lib/tdb2/traverse.c
deleted file mode 100644 (file)
index ed51a9e..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
- /*
-   Trivial Database 2: traverse function.
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/likely/likely.h>
-
-_PUBLIC_ int64_t tdb_traverse_(struct tdb_context *tdb,
-                     int (*fn)(struct tdb_context *,
-                               TDB_DATA, TDB_DATA, void *),
-                     void *p)
-{
-       enum TDB_ERROR ecode;
-       struct traverse_info tinfo;
-       struct tdb_data k, d;
-       int64_t count = 0;
-
-       k.dptr = NULL;
-       for (ecode = first_in_hash(tdb, &tinfo, &k, &d.dsize);
-            ecode == TDB_SUCCESS;
-            ecode = next_in_hash(tdb, &tinfo, &k, &d.dsize)) {
-               d.dptr = k.dptr + k.dsize;
-
-               count++;
-               if (fn && fn(tdb, k, d, p)) {
-                       free(k.dptr);
-                       tdb->last_error = TDB_SUCCESS;
-                       return count;
-               }
-               free(k.dptr);
-       }
-
-       if (ecode != TDB_ERR_NOEXIST) {
-               return TDB_ERR_TO_OFF(tdb->last_error = ecode);
-       }
-       tdb->last_error = TDB_SUCCESS;
-       return count;
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_firstkey(struct tdb_context *tdb, struct tdb_data *key)
-{
-       struct traverse_info tinfo;
-
-       return tdb->last_error = first_in_hash(tdb, &tinfo, key, NULL);
-}
-
-/* We lock twice, not very efficient.  We could keep last key & tinfo cached. */
-_PUBLIC_ enum TDB_ERROR tdb_nextkey(struct tdb_context *tdb, struct tdb_data *key)
-{
-       struct traverse_info tinfo;
-       struct hash_info h;
-       struct tdb_used_record rec;
-
-       tinfo.prev = find_and_lock(tdb, *key, F_RDLCK, &h, &rec, &tinfo);
-       free(key->dptr);
-       if (TDB_OFF_IS_ERR(tinfo.prev)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(tinfo.prev);
-       }
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-
-       return tdb->last_error = next_in_hash(tdb, &tinfo, key, NULL);
-}
-
-static int wipe_one(struct tdb_context *tdb,
-                   TDB_DATA key, TDB_DATA data, enum TDB_ERROR *ecode)
-{
-       *ecode = tdb_delete(tdb, key);
-       return (*ecode != TDB_SUCCESS);
-}
-
-_PUBLIC_ enum TDB_ERROR tdb_wipe_all(struct tdb_context *tdb)
-{
-       enum TDB_ERROR ecode;
-       int64_t count;
-
-       ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
-       if (ecode != TDB_SUCCESS)
-               return tdb->last_error = ecode;
-
-       /* FIXME: Be smarter. */
-       count = tdb_traverse(tdb, wipe_one, &ecode);
-       if (count < 0)
-               ecode = TDB_OFF_TO_ERR(count);
-       tdb_allrecord_unlock(tdb, F_WRLCK);
-       return tdb->last_error = ecode;
-}
diff --git a/lib/tdb2/wscript b/lib/tdb2/wscript
deleted file mode 100644 (file)
index ef30f1b..0000000
+++ /dev/null
@@ -1,278 +0,0 @@
-#!/usr/bin/env python
-
-APPNAME = 'tdb'
-VERSION = '2.0.1'
-
-blddir = 'bin'
-
-import sys, os
-
-# find the buildtools directory
-srcdir = '.'
-while not os.path.exists(srcdir+'/buildtools') and len(srcdir.split('/')) < 5:
-    srcdir = '../' + srcdir
-sys.path.insert(0, srcdir + '/buildtools/wafsamba')
-
-import wafsamba, samba_dist, Options, Logs, glob
-
-samba_dist.DIST_DIRS('lib/tdb2:. lib/replace:lib/replace lib/ccan:lib/ccan buildtools:buildtools')
-
-def set_options(opt):
-    opt.BUILTIN_DEFAULT('replace,ccan')
-    opt.PRIVATE_EXTENSION_DEFAULT('tdb2', noextension='tdb2')
-    opt.RECURSE('lib/replace')
-    opt.add_option('--enable-developer',
-                   help=("Turn on developer warnings and debugging"),
-                   action="store_true", dest='developer', default=False)
-    opt.add_option('--enable-tdb2',
-                   help=("Use tdb2 API instead of tdb1 [True]"),
-                   action="store_true", dest='BUILD_TDB2', default=True)
-    opt.add_option('--disable-tdb2',
-                   help=("Use old tdb1 API instead of tdb2"),
-                   action="store_false", dest='BUILD_TDB2')
-    opt.add_option('--valgrind',
-                   help=("use valgrind on tests programs"),
-                   action="store_true", dest='VALGRIND', default=False)
-    opt.add_option('--valgrind-log',
-                   help=("where to put the valgrind log"),
-                   action="store", dest='VALGRINDLOG', default=None)
-    if opt.IN_LAUNCH_DIR():
-        opt.add_option('--disable-python',
-                       help=("disable the pytdb module"),
-                       action="store_true", dest='disable_python', default=False)
-
-def configure(conf):
-    if Options.options.developer:
-        conf.env.DEVELOPER_MODE = True
-
-    conf.env.TEST_RUN_SRC=['test/run-001-encode.c',
-                           'test/run-001-fls.c',
-                           'test/run-01-new_database.c',
-                           'test/run-02-expand.c',
-                           'test/run-03-coalesce.c',
-                           'test/run-04-basichash.c',
-                           'test/run-05-readonly-open.c',
-                           'test/run-10-simple-store.c',
-                           'test/run-11-simple-fetch.c',
-                           'test/run-12-check.c',
-                           'test/run-15-append.c',
-                           'test/run-20-growhash.c',
-                           'test/run-25-hashoverload.c',
-                           'test/run-30-exhaust-before-expand.c',
-                           'test/run-35-convert.c',
-                           'test/run-50-multiple-freelists.c',
-                           'test/run-56-open-during-transaction.c',
-                           'test/run-57-die-during-transaction.c',
-                           'test/run-64-bit-tdb.c',
-                           'test/run-90-get-set-attributes.c',
-                           'test/run-capabilities.c',
-                           'test/run-expand-in-transaction.c',
-                           'test/run-features.c',
-                           'test/run-lockall.c',
-                           'test/run-remap-in-read_traverse.c',
-                           'test/run-seed.c',
-                           'test/run-tdb_errorstr.c',
-                           'test/run-tdb_foreach.c',
-                           'test/run-traverse.c']
-    conf.env.TEST_API_SRC=['test/api-12-store.c',
-                           'test/api-13-delete.c',
-                           'test/api-14-exists.c',
-                           'test/api-16-wipe_all.c',
-                           'test/api-21-parse_record.c',
-                           'test/api-55-transaction.c',
-                           'test/api-80-tdb_fd.c',
-                           'test/api-81-seqnum.c',
-                           'test/api-82-lockattr.c',
-                           'test/api-83-openhook.c',
-                           'test/api-91-get-stats.c',
-                           'test/api-92-get-set-readonly.c',
-                           'test/api-93-repack.c',
-                           'test/api-add-remove-flags.c',
-                           'test/api-check-callback.c',
-                           'test/api-firstkey-nextkey.c',
-                           'test/api-fork-test.c',
-                           'test/api-locktimeout.c',
-                           'test/api-missing-entries.c',
-                           'test/api-open-multiple-times.c',
-                           'test/api-record-expand.c',
-                           'test/api-simple-delete.c',
-                           'test/api-summary.c']
-    conf.env.TEST_API_HELPER_SRC=['test/helpapi-external-agent.c']
-    conf.env.TEST_RUN_HELPER_SRC=['test/helprun-external-agent.c',
-                                  'test/helprun-layout.c']
-    conf.env.TEST_HELPER_SRC=['test/external-agent.c',
-                              'test/failtest_helper.c',
-                              'test/lock-tracking.c',
-                              'test/logging.c',
-                              'test/tap-interface.c']
-
-    if Options.options.BUILD_TDB2:
-        conf.DEFINE('BUILD_TDB2', 1)
-        conf.RECURSE('lib/replace')
-        conf.RECURSE('lib/ccan')
-
-        conf.env.standalone_tdb2 = conf.IN_LAUNCH_DIR()
-        conf.env.disable_python = getattr(Options.options, 'disable_python', False)
-
-        if not conf.env.standalone_tdb2:
-            if conf.CHECK_BUNDLED_SYSTEM('tdb', minversion=VERSION,
-                                         implied_deps='replace'):
-                conf.define('USING_SYSTEM_TDB2', 1)
-                if conf.CHECK_BUNDLED_SYSTEM_PYTHON('pytdb', 'tdb', minversion=VERSION):
-                    conf.define('USING_SYSTEM_PYTDB', 1)
-
-        if not conf.env.disable_python:
-            # also disable if we don't have the python libs installed
-            conf.find_program('python', var='PYTHON')
-            conf.check_tool('python')
-            conf.check_python_version((2,4,2))
-            conf.SAMBA_CHECK_PYTHON_HEADERS(mandatory=False)
-            if not conf.env.HAVE_PYTHON_H:
-                Logs.warn('Disabling pytdb as python devel libs not found')
-                conf.env.disable_python = True
-
-        # This make #include <ccan/...> work.
-        conf.ADD_EXTRA_INCLUDES('''#lib''')
-
-        conf.SAMBA_CONFIG_H()
-
-def build(bld):
-    if bld.env.BUILD_TDB2:
-        bld.RECURSE('lib/replace')
-        bld.RECURSE('lib/ccan')
-
-        if bld.env.standalone_tdb2:
-            bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig'
-            private_library = False
-        else:
-            private_library = True
-
-        SRC = '''check.c free.c hash.c io.c lock.c open.c
-                 summary.c tdb.c transaction.c traverse.c'''
-
-        if not bld.CONFIG_SET('USING_SYSTEM_TDB2'):
-            bld.SAMBA_LIBRARY('tdb',
-                              SRC,
-                              deps='replace ccan',
-                              includes='.',
-                              abi_directory='ABI',
-                              abi_match='tdb_*',
-                              hide_symbols=True,
-                              vnum=VERSION,
-                              public_headers='tdb2.h',
-                              public_headers_install=not private_library,
-                              pc_files='tdb.pc',
-                              private_library=private_library)
-
-            bld.SAMBA_BINARY('tdbtorture',
-                             'tools/tdb2torture.c',
-                             deps='tdb',
-                             install=False)
-
-            bld.SAMBA_BINARY('tdbtool',
-                             'tools/tdb2tool.c',
-                             deps='tdb')
-
-            bld.SAMBA_BINARY('tdbdump',
-                             'tools/tdb2dump.c',
-                             deps='tdb')
-
-            bld.SAMBA_BINARY('tdbrestore',
-                             'tools/tdb2restore.c',
-                             deps='tdb')
-
-            bld.SAMBA_BINARY('tdbbackup',
-                             'tools/tdb2backup.c',
-                             deps='tdb')
-
-            if not bld.CONFIG_SET('USING_SYSTEM_PYTDB'):
-                bld.SAMBA_PYTHON('pytdb',
-                                 source='pytdb.c',
-                                 deps='tdb',
-                                 enabled=not bld.env.disable_python,
-                                 realname='tdb.so',
-                                 cflags='-DPACKAGE_VERSION=\"%s\"' % VERSION)
-
-            if bld.env.DEVELOPER_MODE:
-                # FIXME: We need CCAN for some API tests, but waf thinks it's
-                # already available via tdb2.  It is, but not publicly.
-                # Workaround is to build a private, non-hiding version.
-                bld.SAMBA_SUBSYSTEM('tdb2-testing',
-                                    SRC,
-                                    deps='replace ccan',
-                                    includes='.')
-
-                bld.SAMBA_SUBSYSTEM('tdb2-test-helpers', bld.env.TEST_HELPER_SRC,
-                                    deps='replace')
-                bld.SAMBA_SUBSYSTEM('tdb2-run-helpers', bld.env.TEST_RUN_HELPER_SRC,
-                                    deps='replace')
-                bld.SAMBA_SUBSYSTEM('tdb2-api-helpers', bld.env.TEST_API_HELPER_SRC,
-                                    deps='replace tdb2-testing')
-
-                for f in bld.env.TEST_RUN_SRC:
-                    base = os.path.splitext(os.path.basename(f))[0]
-                    bld.SAMBA_BINARY('tdb2-' + base, f,
-                                     deps='ccan replace tdb2-test-helpers tdb2-run-helpers ccan-failtest',
-                                     install=False)
-
-                for f in bld.env.TEST_API_SRC:
-                    base = os.path.splitext(os.path.basename(f))[0]
-                    bld.SAMBA_BINARY('tdb2-' + base, f,
-                                     deps='ccan replace tdb2-test-helpers tdb2-api-helpers',
-                                     install=False)
-
-def testonly(ctx):
-    '''run tdb2 testsuite'''
-    import Utils, samba_utils, shutil
-    ecode = 0;
-
-    env = samba_utils.LOAD_ENVIRONMENT()
-
-    if env.BUILD_TDB2 and env.standalone_tdb2 and env.DEVELOPER_MODE:
-
-        # FIXME: This is horrible :(
-        test_prefix = "%s/st" % (Utils.g_module.blddir)
-        shutil.rmtree(test_prefix, ignore_errors=True)
-        os.makedirs(test_prefix)
-
-        # Create scratch directory for tests.
-        testdir = os.path.join(test_prefix, 'tdb2-tests')
-        samba_utils.mkdir_p(testdir)
-        # Symlink back to source dir so it can find tests in test/
-        link = os.path.join(testdir, 'test')
-        if not os.path.exists(link):
-            os.symlink(os.path.abspath(os.path.join(env.cwd, 'test')), link)
-
-        if Options.options.VALGRIND:
-            os.environ['VALGRIND'] = 'valgrind -q --num-callers=30'
-        if Options.options.VALGRINDLOG is not None:
-            os.environ['VALGRIND'] += ' --log-file=%s' % Options.options.VALGRINDLOG
-
-        for f in env.TEST_RUN_SRC + env.TEST_API_SRC:
-            name = "tdb2-" + os.path.splitext(os.path.basename(f))[0]
-            cmd = "cd " + testdir + " && $VALGRIND " + os.path.abspath(os.path.join(Utils.g_module.blddir, name)) + " > test-output 2>&1"
-            print("..." + f)
-            ret = samba_utils.RUN_COMMAND(cmd)
-            if ret != 0:
-                print("%s (%s) failed:" % (name, f))
-                samba_utils.RUN_COMMAND("cat " + os.path.join(testdir, 'test-output'))
-                ecode = ret;
-                break;
-
-    sys.exit(ecode)
-
-# WAF doesn't build the unit tests for this, maybe because they don't link with tdb?
-# This forces it
-def test(ctx):
-    import Scripting
-    Scripting.commands.append('build')
-    Scripting.commands.append('testonly')
-
-def dist():
-    '''makes a tarball for distribution'''
-    samba_dist.dist()
-
-def reconfigure(ctx):
-    '''reconfigure if config scripts have changed'''
-    import samba_utils
-    samba_utils.reconfigure(ctx)
index 9fb0a7ced5cbdd9ff7dd209b406a005497592b8b..fcdfdb7dd3f137f5506b743866659eea83f6023e 100755 (executable)
@@ -21,6 +21,7 @@ builddirs = {
     "samba4-libs"  : ".",
     "ldb"     : "lib/ldb",
     "tdb"     : "lib/tdb",
+    "ntdb"    : "lib/ntdb",
     "talloc"  : "lib/talloc",
     "replace" : "lib/replace",
     "tevent"  : "lib/tevent",
@@ -30,7 +31,7 @@ builddirs = {
     "retry"   : "."
     }
 
-defaulttasks = [ "samba3", "samba4", "samba4-libs", "ldb", "tdb", "talloc", "replace", "tevent", "pidl" ]
+defaulttasks = [ "samba3", "samba4", "samba4-libs", "ldb", "tdb", "ntdb", "talloc", "replace", "tevent", "pidl" ]
 
 tasks = {
     "samba3" : [ ("autogen", "./autogen.sh", "text/plain"),
@@ -91,6 +92,14 @@ tasks = {
               ("distcheck", "make distcheck", "text/plain"),
               ("clean", "make clean", "text/plain") ],
 
+    "ntdb" : [ ("configure", "./configure --enable-developer -C ${PREFIX}", "text/plain"),
+               ("make", "make -j", "text/plain"),
+               ("install", "make install", "text/plain"),
+               ("test", "make test", "text/plain"),
+               ("check-clean-tree", "../../script/clean-source-tree.sh", "text/plain"),
+               ("distcheck", "make distcheck", "text/plain"),
+               ("clean", "make clean", "text/plain") ],
+
     "talloc" : [ ("configure", "./configure --enable-developer -C ${PREFIX}", "text/plain"),
                  ("make", "make -j", "text/plain"),
                  ("install", "make install", "text/plain"),