tdb: suppress record write locks when allrecord lock is taken.
[ira/wip.git] / lib / tdb / common / traverse.c
index 07b0c238587eb85fe3945b61b71dbd9dd0729d58..d329ef4da4030c095e530b0d7b05b344c2256305 100644 (file)
 
 #include "tdb_private.h"
 
-/* Uses traverse lock: 0 = finish, -1 = error, other = record offset */
-static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tlock,
-                        struct list_struct *rec)
+#define TDB_NEXT_LOCK_ERR ((tdb_off_t)-1)
+
+/* Uses traverse lock: 0 = finish, TDB_NEXT_LOCK_ERR = error,
+   other = record offset */
+static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tlock,
+                        struct tdb_record *rec)
 {
        int want_next = (tlock->off != 0);
 
@@ -71,7 +74,7 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc
                }
 
                if (tdb_lock(tdb, tlock->hash, tlock->lock_rw) == -1)
-                       return -1;
+                       return TDB_NEXT_LOCK_ERR;
 
                /* No previous record?  Start at top of chain. */
                if (!tlock->off) {
@@ -99,6 +102,7 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc
 
                        /* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */
                        if (tlock->off == rec->next) {
+                               tdb->ecode = TDB_ERR_CORRUPT;
                                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: loop detected.\n"));
                                goto fail;
                        }
@@ -121,13 +125,14 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc
                want_next = 0;
        }
        /* We finished iteration without finding anything */
-       return TDB_ERRCODE(TDB_SUCCESS, 0);
+       tdb->ecode = TDB_SUCCESS;
+       return 0;
 
  fail:
        tlock->off = 0;
        if (tdb_unlock(tdb, tlock->hash, tlock->lock_rw) != 0)
                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: On error unlock failed!\n"));
-       return -1;
+       return TDB_NEXT_LOCK_ERR;
 }
 
 /* traverse the entire database - calling fn(tdb, key, data) on each element.
@@ -140,8 +145,9 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
                                 struct tdb_traverse_lock *tl)
 {
        TDB_DATA key, dbuf;
-       struct list_struct rec;
-       int ret, count = 0;
+       struct tdb_record rec;
+       int ret = 0, count = 0;
+       tdb_off_t off;
 
        /* This was in the initializaton, above, but the IRIX compiler
         * did not like it.  crh
@@ -152,7 +158,11 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
        tdb->travlocks.next = tl;
 
        /* tdb_next_lock places locks on the record returned, and its chain */
-       while ((ret = tdb_next_lock(tdb, tl, &rec)) > 0) {
+       while ((off = tdb_next_lock(tdb, tl, &rec)) != 0) {
+               if (off == TDB_NEXT_LOCK_ERR) {
+                       ret = -1;
+                       goto out;
+               }
                count++;
                /* now read the full record */
                key.dptr = tdb_alloc_read(tdb, tl->off + sizeof(rec), 
@@ -169,6 +179,8 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
                dbuf.dptr = key.dptr + rec.key_len;
                dbuf.dsize = rec.data_len;
 
+               tdb_trace_1rec_retrec(tdb, "traverse", key, dbuf);
+
                /* Drop chain lock, call out */
                if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) {
                        ret = -1;
@@ -177,7 +189,7 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
                }
                if (fn && fn(tdb, key, dbuf, private_data)) {
                        /* They want us to terminate traversal */
-                       ret = count;
+                       tdb_trace_ret(tdb, "tdb_traverse_end", count);
                        if (tdb_unlock_record(tdb, tl->off) != 0) {
                                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: unlock_record failed!\n"));;
                                ret = -1;
@@ -187,6 +199,7 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
                }
                SAFE_FREE(key.dptr);
        }
+       tdb_trace(tdb, "tdb_traverse_end");
 out:
        tdb->travlocks.next = tl->next;
        if (ret < 0)
@@ -212,10 +225,11 @@ int tdb_traverse_read(struct tdb_context *tdb,
        }
 
        tdb->traverse_read++;
+       tdb_trace(tdb, "tdb_traverse_read_start");
        ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
        tdb->traverse_read--;
 
-       tdb_transaction_unlock(tdb);
+       tdb_transaction_unlock(tdb, F_RDLCK);
 
        return ret;
 }
@@ -236,16 +250,17 @@ int tdb_traverse(struct tdb_context *tdb,
        if (tdb->read_only || tdb->traverse_read) {
                return tdb_traverse_read(tdb, fn, private_data);
        }
-       
+
        if (tdb_transaction_lock(tdb, F_WRLCK)) {
                return -1;
        }
 
        tdb->traverse_write++;
+       tdb_trace(tdb, "tdb_traverse_start");
        ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
        tdb->traverse_write--;
 
-       tdb_transaction_unlock(tdb);
+       tdb_transaction_unlock(tdb, F_WRLCK);
 
        return ret;
 }
@@ -255,7 +270,8 @@ int tdb_traverse(struct tdb_context *tdb,
 TDB_DATA tdb_firstkey(struct tdb_context *tdb)
 {
        TDB_DATA key;
-       struct list_struct rec;
+       struct tdb_record rec;
+       tdb_off_t off;
 
        /* release any old lock */
        if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0)
@@ -264,12 +280,17 @@ TDB_DATA tdb_firstkey(struct tdb_context *tdb)
        tdb->travlocks.lock_rw = F_RDLCK;
 
        /* Grab first record: locks chain and returned record. */
-       if (tdb_next_lock(tdb, &tdb->travlocks, &rec) <= 0)
+       off = tdb_next_lock(tdb, &tdb->travlocks, &rec);
+       if (off == 0 || off == TDB_NEXT_LOCK_ERR) {
+               tdb_trace_retrec(tdb, "tdb_firstkey", tdb_null);
                return tdb_null;
+       }
        /* now read the key */
        key.dsize = rec.key_len;
        key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize);
 
+       tdb_trace_retrec(tdb, "tdb_firstkey", key);
+
        /* Unlock the hash chain of the record we just read. */
        if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0)
                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_firstkey: error occurred while tdb_unlocking!\n"));
@@ -281,8 +302,9 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
 {
        uint32_t oldhash;
        TDB_DATA key = tdb_null;
-       struct list_struct rec;
+       struct tdb_record rec;
        unsigned char *k = NULL;
+       tdb_off_t off;
 
        /* Is locked key the old key?  If so, traverse will be reliable. */
        if (tdb->travlocks.off) {
@@ -294,6 +316,8 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
                    || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) {
                        /* No, it wasn't: unlock it and start from scratch */
                        if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0) {
+                               tdb_trace_1rec_retrec(tdb, "tdb_nextkey",
+                                                     oldkey, tdb_null);
                                SAFE_FREE(k);
                                return tdb_null;
                        }
@@ -310,8 +334,10 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
        if (!tdb->travlocks.off) {
                /* No previous element: do normal find, and lock record */
                tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), tdb->travlocks.lock_rw, &rec);
-               if (!tdb->travlocks.off)
+               if (!tdb->travlocks.off) {
+                       tdb_trace_1rec_retrec(tdb, "tdb_nextkey", oldkey, tdb_null);
                        return tdb_null;
+               }
                tdb->travlocks.hash = BUCKET(rec.full_hash);
                if (tdb_lock_record(tdb, tdb->travlocks.off) != 0) {
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: lock_record failed (%s)!\n", strerror(errno)));
@@ -322,7 +348,8 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
 
        /* Grab next record: locks chain and returned record,
           unlocks old record */
-       if (tdb_next_lock(tdb, &tdb->travlocks, &rec) > 0) {
+       off = tdb_next_lock(tdb, &tdb->travlocks, &rec);
+       if (off != TDB_NEXT_LOCK_ERR && off != 0) {
                key.dsize = rec.key_len;
                key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec),
                                          key.dsize);
@@ -333,6 +360,7 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
        /* Unlock the chain of old record */
        if (tdb_unlock(tdb, BUCKET(oldhash), tdb->travlocks.lock_rw) != 0)
                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
+       tdb_trace_1rec_retrec(tdb, "tdb_nextkey", oldkey, key);
        return key;
 }