ctdb_client: fix race in starting concurrent transactions on a single node
authorMichael Adam <obnox@samba.org>
Tue, 4 Aug 2009 07:45:50 +0000 (09:45 +0200)
committerMichael Adam <obnox@samba.org>
Mon, 21 Sep 2009 09:16:18 +0000 (11:16 +0200)
There are two races in concurrent transactions on a single node.
One in starting a transaction, and one with committing (replaying).

This commit closes the first race by storing the pid in the
transaction-lock record and comparing the own pid against it
as a measure to prevent starting a second transaction when
a second node has come inbetween and changed the pid in the lock
record.

Michael

client/ctdb_client.c

index 1f5bb4cd2c790aa9d10f025916d5ed529510a967..96214353988cc234e2ff850ff8e513ada562c671 100644 (file)
@@ -3107,11 +3107,13 @@ static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
 {
        struct ctdb_record_handle *rh;
        TDB_DATA key;
+       TDB_DATA data;
        struct ctdb_ltdb_header header;
        TALLOC_CTX *tmp_ctx;
        const char *keyname = CTDB_TRANSACTION_LOCK_KEY;
        int ret;
        struct ctdb_db_context *ctdb_db = h->ctdb_db;
+       pid_t pid;
 
        key.dptr = discard_const(keyname);
        key.dsize = strlen(keyname);
@@ -3130,6 +3132,21 @@ again:
                talloc_free(tmp_ctx);
                return -1;
        }
+       /*
+        * store the pid in the database:
+        * it is not enough that the node is dmaster...
+        */
+       pid = getpid();
+       data.dptr = (unsigned char *)&pid;
+       data.dsize = sizeof(pid_t);
+       ret = ctdb_ltdb_store(ctdb_db, key, &(rh->header), data);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, (__location__ " Failed to store pid in "
+                                 "transaction record\n"));
+               talloc_free(tmp_ctx);
+               return -1;
+       }
+
        talloc_free(rh);
 
        ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
@@ -3139,13 +3156,19 @@ again:
                return -1;
        }
 
-       ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, NULL);
+       ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, &data);
        if (ret != 0 || header.dmaster != ctdb_db->ctdb->pnn) {
                tdb_transaction_cancel(ctdb_db->ltdb->tdb);
                talloc_free(tmp_ctx);
                goto again;
        }
 
+       if ((data.dsize != sizeof(pid_t)) || (*(pid_t *)(data.dptr) != pid)) {
+               tdb_transaction_cancel(ctdb_db->ltdb->tdb);
+               talloc_free(tmp_ctx);
+               goto again;
+       }
+
        talloc_free(tmp_ctx);
 
        return 0;