tdb: fix an early release of the global lock that can cause data corruption
authorVolker Lendecke <vl@samba.org>
Fri, 29 Jan 2010 17:21:09 +0000 (18:21 +0100)
committerRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 1 Feb 2010 20:52:15 +0000 (07:52 +1100)
There was a bug in tdb where the

                tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);

(ending the transaction-"mutex") was done before the

                        /* remove the recovery marker */

This means that when a transaction is committed there is a window where another
opener of the file sees the transaction marker while the transaction committer
is still fully functional and working on it. This led to transaction being
rolled back by that second opener of the file while transaction_commit() gave
no error to the caller.

This patch moves the F_UNLCK to after the recovery marker was removed, closing
this window.

lib/tdb/common/transaction.c

index 20f2bfc2cd4595a27114fc9662c483c83b72b29d..b8988ea8301d807600f14925e756a9356848ea23 100644 (file)
@@ -135,6 +135,9 @@ struct tdb_transaction {
        bool prepared;
        tdb_off_t magic_offset;
 
+       /* set when the GLOBAL_LOCK has been taken */
+       bool global_lock_taken;
+
        /* old file size before transaction */
        tdb_len_t old_map_size;
 
@@ -603,6 +606,11 @@ int _tdb_transaction_cancel(struct tdb_context *tdb)
                }
        }
 
+       if (tdb->transaction->global_lock_taken) {
+               tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
+               tdb->transaction->global_lock_taken = false;
+       }
+
        /* remove any global lock created during the transaction */
        if (tdb->global_lock.count != 0) {
                tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 4*tdb->header.hash_size);
@@ -947,11 +955,12 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
                return -1;
        }
 
+       tdb->transaction->global_lock_taken = true;
+
        if (!(tdb->flags & TDB_NOSYNC)) {
                /* write the recovery data to the end of the file */
                if (transaction_setup_recovery(tdb, &tdb->transaction->magic_offset) == -1) {
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: failed to setup recovery data\n"));
-                       tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
                        _tdb_transaction_cancel(tdb);
                        return -1;
                }
@@ -966,7 +975,6 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
                                             tdb->transaction->old_map_size) == -1) {
                        tdb->ecode = TDB_ERR_IO;
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: expansion failed\n"));
-                       tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
                        _tdb_transaction_cancel(tdb);
                        return -1;
                }
@@ -1056,7 +1064,6 @@ int tdb_transaction_commit(struct tdb_context *tdb)
                        tdb_transaction_recover(tdb); 
 
                        _tdb_transaction_cancel(tdb);
-                       tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
 
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n"));
                        return -1;
@@ -1072,8 +1079,6 @@ int tdb_transaction_commit(struct tdb_context *tdb)
                return -1;
        }
 
-       tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
-
        /*
          TODO: maybe write to some dummy hdr field, or write to magic
          offset without mmap, before the last sync, instead of the