Copyright (C) Andrew Tridgell 1999-2000
Copyright (C) Luke Kenneth Casson Leighton 2000
Copyright (C) Paul `Rusty' Russell 2000
- Copyright (C) Jeremy Allison 2000
+ Copyright (C) Jeremy Allison 2000-2003
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
+
+/* NOTE: If you use tdbs under valgrind, and in particular if you run
+ * tdbtorture, you may get spurious "uninitialized value" warnings. I
+ * think this is because valgrind doesn't understand that the mmap'd
+ * area may be written to by other processes. Memory can, from the
+ * point of view of the grinded process, spontaneously become
+ * initialized.
+ *
+ * I can think of a few solutions. [mbp 20030311]
+ *
+ * 1 - Write suppressions for Valgrind so that it doesn't complain
+ * about this. Probably the most reasonable but people need to
+ * remember to use them.
+ *
+ * 2 - Use IO not mmap when running under valgrind. Not so nice.
+ *
+ * 3 - Use the special valgrind macros to mark memory as valid at the
+ * right time. Probably too hard -- the process just doesn't know.
+ */
+
#ifdef STANDALONE
#if HAVE_CONFIG_H
#include <config.h>
if (tdb->flags & TDB_NOLOCK)
return 0;
- if (tdb->read_only) {
+ if ((rw_type == F_WRLCK) && (tdb->read_only)) {
errno = EACCES;
return -1;
}
if (ret == -1) {
if (!probe && lck_type != F_SETLK) {
+ /* Ensure error code is set for log fun to examine. */
+ if (errno == EINTR && palarm_fired && *palarm_fired)
+ tdb->ecode = TDB_ERR_LOCK_TIMEOUT;
+ else
+ tdb->ecode = TDB_ERR_LOCK;
TDB_LOG((tdb, 5,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d\n",
tdb->fd, offset, rw_type, lck_type));
}
return 0;
if (tdb->flags & TDB_INTERNAL) {
if (!probe) {
+ /* Ensure ecode is set for log fn. */
+ tdb->ecode = TDB_ERR_IO;
TDB_LOG((tdb, 0,"tdb_oob len %d beyond internal malloc size %d\n",
(int)len, (int)tdb->map_size));
}
if (st.st_size < (size_t)len) {
if (!probe) {
+ /* Ensure ecode is set for log fn. */
+ tdb->ecode = TDB_ERR_IO;
TDB_LOG((tdb, 0,"tdb_oob len %d beyond eof at %d\n",
(int)len, (int)st.st_size));
}
else if (lseek(tdb->fd, off, SEEK_SET) != off
|| write(tdb->fd, buf, len) != (ssize_t)len) {
#endif
+ /* Ensure ecode is set for log fn. */
+ tdb->ecode = TDB_ERR_IO;
TDB_LOG((tdb, 0,"tdb_write failed at %d len=%d (%s)\n",
off, len, strerror(errno)));
return TDB_ERRCODE(TDB_ERR_IO, -1);
else if (lseek(tdb->fd, off, SEEK_SET) != off
|| read(tdb->fd, buf, len) != (ssize_t)len) {
#endif
+ /* Ensure ecode is set for log fn. */
+ tdb->ecode = TDB_ERR_IO;
TDB_LOG((tdb, 0,"tdb_read failed at %d len=%d (%s)\n",
off, len, strerror(errno)));
return TDB_ERRCODE(TDB_ERR_IO, -1);
char *buf;
if (!(buf = malloc(len))) {
+ /* Ensure ecode is set for log fn. */
+ tdb->ecode = TDB_ERR_OOM;
TDB_LOG((tdb, 0,"tdb_alloc_read malloc failed len=%d (%s)\n",
len, strerror(errno)));
return TDB_ERRCODE(TDB_ERR_OOM, buf);
if (tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
return -1;
if (TDB_BAD_MAGIC(rec)) {
+ /* Ensure ecode is set for log fn. */
+ tdb->ecode = TDB_ERR_CORRUPT;
TDB_LOG((tdb, 0,"rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
}
}
if (rec->magic != TDB_FREE_MAGIC) {
+ /* Ensure ecode is set for log fn. */
+ tdb->ecode = TDB_ERR_CORRUPT;
TDB_LOG((tdb, 0,"rec_free_read bad magic 0x%x at offset=%d\n",
rec->magic, off));
return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
/* update an entry in place - this only works if the new data size
is <= the old data size and the key exists.
- on failure return -1
+ on failure return -1.
*/
+
static int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf)
{
struct list_struct rec;
tdb_off rec_ptr;
- int ret = -1;
/* find entry */
- if (!(rec_ptr = tdb_find_lock(tdb, key, F_WRLCK, &rec)))
+ if (!(rec_ptr = tdb_find(tdb, key, tdb_hash(&key), &rec)))
return -1;
/* must be long enough key, data and tailer */
if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off)) {
tdb->ecode = TDB_SUCCESS; /* Not really an error */
- goto out;
+ return -1;
}
if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
dbuf.dptr, dbuf.dsize) == -1)
- goto out;
+ return -1;
if (dbuf.dsize != rec.data_len) {
/* update size */
rec.data_len = dbuf.dsize;
- ret = rec_write(tdb, rec_ptr, &rec);
- } else
- ret = 0;
- out:
- tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK);
- return ret;
+ return rec_write(tdb, rec_ptr, &rec);
+ }
+
+ return 0;
}
/* find an entry in the database given a key */
/* Try to clean dead ones from old traverses */
current = tlock->off;
tlock->off = rec->next;
- if (do_delete(tdb, current, rec) != 0)
+ if (!tdb->read_only &&
+ do_delete(tdb, current, rec) != 0)
goto fail;
}
tdb_unlock(tdb, tlock->hash, F_WRLCK);
}
memcpy(p, key.dptr, key.dsize);
- memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
+ if (dbuf.dsize)
+ memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
/* now we're into insert / modify / replace of a record which
* we know could not be optimised by an in-place store (for
goto out;
}
+/* Attempt to append data to an entry in place - this only works if the new data size
+ is <= the old data size and the key exists.
+ on failure return -1. Record must be locked before calling.
+*/
+static int tdb_append_inplace(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA new_dbuf)
+{
+ struct list_struct rec;
+ tdb_off rec_ptr;
+
+ /* find entry */
+ if (!(rec_ptr = tdb_find(tdb, key, tdb_hash(&key), &rec)))
+ return -1;
+
+ /* Append of 0 is always ok. */
+ if (new_dbuf.dsize == 0)
+ return 0;
+
+ /* must be long enough for key, old data + new data and tailer */
+ if (rec.rec_len < key.dsize + rec.data_len + new_dbuf.dsize + sizeof(tdb_off)) {
+ /* No room. */
+ tdb->ecode = TDB_SUCCESS; /* Not really an error */
+ return -1;
+ }
+
+ if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len + rec.data_len,
+ new_dbuf.dptr, new_dbuf.dsize) == -1)
+ return -1;
+
+ /* update size */
+ rec.data_len += new_dbuf.dsize;
+ return rec_write(tdb, rec_ptr, &rec);
+}
+
+/* Append to an entry. Create if not exist. */
+
+int tdb_append(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA new_dbuf)
+{
+ struct list_struct rec;
+ u32 hash;
+ tdb_off rec_ptr;
+ char *p = NULL;
+ int ret = 0;
+ size_t new_data_size = 0;
+
+ /* find which hash bucket it is in */
+ hash = tdb_hash(&key);
+ if (!tdb_keylocked(tdb, hash))
+ return -1;
+ if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
+ return -1;
+
+ /* first try in-place. */
+ if (tdb_append_inplace(tdb, key, new_dbuf) == 0)
+ goto out;
+
+ /* reset the error code potentially set by the tdb_append_inplace() */
+ tdb->ecode = TDB_SUCCESS;
+
+ /* find entry */
+ if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
+ if (tdb->ecode != TDB_ERR_NOEXIST)
+ goto fail;
+
+ /* Not found - create. */
+
+ ret = tdb_store(tdb, key, new_dbuf, TDB_INSERT);
+ goto out;
+ }
+
+ new_data_size = rec.data_len + new_dbuf.dsize;
+
+ /* Copy key+old_value+value *before* allocating free space in case malloc
+ fails and we are left with a dead spot in the tdb. */
+
+ if (!(p = (char *)malloc(key.dsize + new_data_size))) {
+ tdb->ecode = TDB_ERR_OOM;
+ goto fail;
+ }
+
+ /* Copy the key in place. */
+ memcpy(p, key.dptr, key.dsize);
+
+ /* Now read the old data into place. */
+ if (rec.data_len &&
+ tdb_read(tdb, rec_ptr + sizeof(rec) + rec.key_len, p + key.dsize, rec.data_len, 0) == -1)
+ goto fail;
+
+ /* Finally append the new data. */
+ if (new_dbuf.dsize)
+ memcpy(p+key.dsize+rec.data_len, new_dbuf.dptr, new_dbuf.dsize);
+
+ /* delete any existing record - if it doesn't exist we don't
+ care. Doing this first reduces fragmentation, and avoids
+ coalescing with `allocated' block before it's updated. */
+
+ tdb_delete(tdb, key);
+
+ if (!(rec_ptr = tdb_allocate(tdb, key.dsize + new_data_size, &rec)))
+ goto fail;
+
+ /* Read hash top into next ptr */
+ if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
+ goto fail;
+
+ rec.key_len = key.dsize;
+ rec.data_len = new_data_size;
+ rec.full_hash = hash;
+ rec.magic = TDB_MAGIC;
+
+ /* write out and point the top of the hash chain at it */
+ if (rec_write(tdb, rec_ptr, &rec) == -1
+ || tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+new_data_size)==-1
+ || ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
+ /* Need to tdb_unallocate() here */
+ goto fail;
+ }
+
+ out:
+ SAFE_FREE(p);
+ tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
+ return ret;
+
+fail:
+ ret = -1;
+ goto out;
+}
+
static int tdb_already_open(dev_t device,
ino_t ino)
{
}
}
-/* close a database */
+/**
+ * Close a database.
+ *
+ * @returns -1 for error; 0 for success.
+ **/
int tdb_close(TDB_CONTEXT *tdb)
{
TDB_CONTEXT **i;
void tdb_unlockkeys(TDB_CONTEXT *tdb)
{
u32 i;
+ if (!tdb->lockedkeys)
+ return;
for (i = 0; i < tdb->lockedkeys[0]; i++)
tdb_unlock(tdb, tdb->lockedkeys[i+1], F_WRLCK);
SAFE_FREE(tdb->lockedkeys);
return tdb_unlock(tdb, BUCKET(tdb_hash(&key)), F_WRLCK);
}
+int tdb_chainlock_read(TDB_CONTEXT *tdb, TDB_DATA key)
+{
+ return tdb_lock(tdb, BUCKET(tdb_hash(&key)), F_RDLCK);
+}
+
+int tdb_chainunlock_read(TDB_CONTEXT *tdb, TDB_DATA key)
+{
+ return tdb_unlock(tdb, BUCKET(tdb_hash(&key)), F_RDLCK);
+}
+
/* register a loging function */
void tdb_logging_function(TDB_CONTEXT *tdb, void (*fn)(TDB_CONTEXT *, int , const char *, ...))