ctdb-daemon: Don't check if lock_ctx->ctdb_db is NULL
[samba.git] / ctdb / server / ctdb_lock.c
index db38e0d7c13089bf97ed3b6c74fa62ddac536177..5f032ae568b7297df5470b92a0c742b527e23a74 100644 (file)
    You should have received a copy of the GNU General Public License
    along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
-#include "includes.h"
-#include "include/ctdb_private.h"
-#include "include/ctdb_protocol.h"
-#include "tevent.h"
-#include "tdb.h"
-#include "db_wrap.h"
+#include "replace.h"
 #include "system/filesys.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
 #include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
+
+#include "ctdb_private.h"
+
+#include "common/common.h"
+#include "common/logging.h"
 
 /*
  * Non-blocking Locking API
  * 2. Once the locks are obtained, signal parent process via fd.
  * 3. Invoke registered callback routine with locking status.
  * 4. If the child process cannot get locks within certain time,
- *    diagnose using /proc/locks and log warning message
+ *    execute an external script to debug.
  *
  * ctdb_lock_record()      - get a lock on a record
  * ctdb_lock_db()          - get a lock on a DB
- * ctdb_lock_alldb_prio()  - get a lock on all DBs with given priority
- * ctdb_lock_alldb()       - get a lock on all DBs
  *
  *  auto_mark              - whether to mark/unmark DBs in before/after callback
+ *                           = false is used for freezing databases for
+ *                           recovery since the recovery cannot start till
+ *                           databases are locked on all the nodes.
+ *                           = true is used for record locks.
  */
 
-/* FIXME: Add a tunable max_lock_processes_per_db */
-#define MAX_LOCK_PROCESSES_PER_DB              (100)
-
 enum lock_type {
        LOCK_RECORD,
        LOCK_DB,
-       LOCK_ALLDB_PRIO,
-       LOCK_ALLDB,
 };
 
 static const char * const lock_type_str[] = {
        "lock_record",
        "lock_db",
-       "lock_alldb_prio",
-       "lock_db",
 };
 
 struct lock_request;
@@ -71,184 +75,49 @@ struct lock_context {
        TDB_DATA key;
        uint32_t priority;
        bool auto_mark;
-       struct lock_request *req_queue;
+       struct lock_request *request;
        pid_t child;
        int fd[2];
        struct tevent_fd *tfd;
        struct tevent_timer *ttimer;
-       pid_t block_child;
-       int block_fd[2];
        struct timeval start_time;
+       uint32_t key_hash;
+       bool can_schedule;
 };
 
 /* lock_request is the client specific part for a lock request */
 struct lock_request {
-       struct lock_request *next, *prev;
        struct lock_context *lctx;
        void (*callback)(void *, bool);
        void *private_data;
 };
 
 
-/*
- * Support samba 3.6.x (and older) versions which do not set db priority.
- *
- * By default, all databases are set to priority 1. So only when priority
- * is set to 1, check for databases that need higher priority.
- */
-static bool later_db(struct ctdb_context *ctdb, const char *name)
-{
-       if (ctdb->tunable.samba3_hack == 0) {
-               return false;
-       }
-
-       if (strstr(name, "brlock") ||
-           strstr(name, "g_lock") ||
-           strstr(name, "notify_onelevel") ||
-           strstr(name, "serverid") ||
-           strstr(name, "xattr_tdb")) {
-               return true;
-       }
-
-       return false;
-}
-
-typedef int (*db_handler_t)(struct ctdb_db_context *ctdb_db,
-                           uint32_t priority,
-                           void *private_data);
-
-static int ctdb_db_iterator(struct ctdb_context *ctdb, uint32_t priority,
-                           db_handler_t handler, void *private_data)
+int ctdb_db_iterator(struct ctdb_context *ctdb, ctdb_db_handler_t handler,
+                    void *private_data)
 {
        struct ctdb_db_context *ctdb_db;
        int ret;
 
        for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
-               if (ctdb_db->priority != priority) {
-                       continue;
-               }
-               if (later_db(ctdb, ctdb_db->db_name)) {
-                       continue;
-               }
-               ret = handler(ctdb_db, priority, private_data);
+               ret = handler(ctdb_db, private_data);
                if (ret != 0) {
                        return -1;
                }
        }
 
-       /* If priority != 1, later_db check is not required and can return */
-       if (priority != 1) {
-               return 0;
-       }
-
-       for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
-               if (!later_db(ctdb, ctdb_db->db_name)) {
-                       continue;
-               }
-               ret = handler(ctdb_db, priority, private_data);
-               if (ret != 0) {
-                       return -1;
-               }
-       }
-
-       return 0;
-}
-
-
-/*
- * lock all databases
- */
-static int db_lock_handler(struct ctdb_db_context *ctdb_db, uint32_t priority,
-                          void *private_data)
-{
-       if (priority == 0) {
-               DEBUG(DEBUG_INFO, ("locking database %s\n",
-                                  ctdb_db->db_name));
-       } else {
-               DEBUG(DEBUG_INFO, ("locking database %s, priority:%u\n",
-                                  ctdb_db->db_name, priority));
-       }
-
-       if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
-               DEBUG(DEBUG_ERR, ("Failed to lock database %s\n",
-                                 ctdb_db->db_name));
-               return -1;
-       }
-
-       return 0;
-}
-
-int ctdb_lockall_prio(struct ctdb_context *ctdb, uint32_t priority)
-{
-       return ctdb_db_iterator(ctdb, priority, db_lock_handler, NULL);
-}
-
-static int ctdb_lockall(struct ctdb_context *ctdb)
-{
-       uint32_t priority;
-
-       for (priority=1; priority<=NUM_DB_PRIORITIES; priority++) {
-               if (ctdb_db_iterator(ctdb, priority, db_lock_handler, NULL) != 0) {
-                       return -1;
-               }
-       }
-
        return 0;
 }
 
-
-/*
- * unlock all databases
- */
-static int db_unlock_handler(struct ctdb_db_context *ctdb_db, uint32_t priority,
-                            void *private_data)
-{
-       if (priority == 0) {
-               DEBUG(DEBUG_INFO, ("unlocking database %s\n",
-                                  ctdb_db->db_name));
-       } else {
-               DEBUG(DEBUG_INFO, ("unlocking database %s, priority:%u\n",
-                                  ctdb_db->db_name, priority));
-       }
-
-       if (tdb_unlockall(ctdb_db->ltdb->tdb) != 0) {
-               DEBUG(DEBUG_ERR, ("Failed to unlock database %s\n",
-                                 ctdb_db->db_name));
-               return -1;
-       }
-
-       return 0;
-}
-
-int ctdb_unlockall_prio(struct ctdb_context *ctdb, uint32_t priority)
-{
-       return ctdb_db_iterator(ctdb, priority, db_unlock_handler, NULL);
-}
-
-static int ctdb_unlockall(struct ctdb_context *ctdb)
-{
-       uint32_t priority;
-
-       for (priority=NUM_DB_PRIORITIES; priority>=0; priority--) {
-               if (ctdb_db_iterator(ctdb, priority, db_unlock_handler, NULL) != 0) {
-                       return -1;
-               }
-       }
-
-       return 0;
-}
-
-
 /*
  * lock all databases - mark only
  */
-static int db_lock_mark_handler(struct ctdb_db_context *ctdb_db, uint32_t priority,
+static int db_lock_mark_handler(struct ctdb_db_context *ctdb_db,
                                void *private_data)
 {
        int tdb_transaction_write_lock_mark(struct tdb_context *);
 
-       DEBUG(DEBUG_INFO, ("marking locked database %s, priority:%u\n",
-                          ctdb_db->db_name, priority));
+       DEBUG(DEBUG_INFO, ("marking locked database %s\n", ctdb_db->db_name));
 
        if (tdb_transaction_write_lock_mark(ctdb_db->ltdb->tdb) != 0) {
                DEBUG(DEBUG_ERR, ("Failed to mark (transaction lock) database %s\n",
@@ -265,47 +134,26 @@ static int db_lock_mark_handler(struct ctdb_db_context *ctdb_db, uint32_t priori
        return 0;
 }
 
-int ctdb_lockall_mark_prio(struct ctdb_context *ctdb, uint32_t priority)
+int ctdb_lockdb_mark(struct ctdb_db_context *ctdb_db)
 {
-       /*
-        * This function is only used by the main dameon during recovery.
-        * At this stage, the databases have already been locked, by a
-        * dedicated child process. The freeze_mode variable is used to track
-        * whether the actual locks are held by the child process or not.
-        */
-
-       if (ctdb->freeze_mode[priority] != CTDB_FREEZE_FROZEN) {
-               DEBUG(DEBUG_ERR, ("Attempt to mark all databases locked when not frozen\n"));
+       if (!ctdb_db_frozen(ctdb_db)) {
+               DEBUG(DEBUG_ERR,
+                     ("Attempt to mark database locked when not frozen\n"));
                return -1;
        }
 
-       return ctdb_db_iterator(ctdb, priority, db_lock_mark_handler, NULL);
+       return db_lock_mark_handler(ctdb_db, NULL);
 }
 
-static int ctdb_lockall_mark(struct ctdb_context *ctdb)
-{
-       uint32_t priority;
-
-       for (priority=1; priority<=NUM_DB_PRIORITIES; priority++) {
-               if (ctdb_db_iterator(ctdb, priority, db_lock_mark_handler, NULL) != 0) {
-                       return -1;
-               }
-       }
-
-       return 0;
-}
-
-
 /*
  * lock all databases - unmark only
  */
-static int db_lock_unmark_handler(struct ctdb_db_context *ctdb_db, uint32_t priority,
+static int db_lock_unmark_handler(struct ctdb_db_context *ctdb_db,
                                  void *private_data)
 {
        int tdb_transaction_write_lock_unmark(struct tdb_context *);
 
-       DEBUG(DEBUG_INFO, ("unmarking locked database %s, priority:%u\n",
-                          ctdb_db->db_name, priority));
+       DEBUG(DEBUG_INFO, ("unmarking locked database %s\n", ctdb_db->db_name));
 
        if (tdb_transaction_write_lock_unmark(ctdb_db->ltdb->tdb) != 0) {
                DEBUG(DEBUG_ERR, ("Failed to unmark (transaction lock) database %s\n",
@@ -322,97 +170,15 @@ static int db_lock_unmark_handler(struct ctdb_db_context *ctdb_db, uint32_t prio
        return 0;
 }
 
-int ctdb_lockall_unmark_prio(struct ctdb_context *ctdb, uint32_t priority)
+int ctdb_lockdb_unmark(struct ctdb_db_context *ctdb_db)
 {
-       /*
-        * This function is only used by the main dameon during recovery.
-        * At this stage, the databases have already been locked, by a
-        * dedicated child process. The freeze_mode variable is used to track
-        * whether the actual locks are held by the child process or not.
-        */
-
-       if (ctdb->freeze_mode[priority] != CTDB_FREEZE_FROZEN) {
-               DEBUG(DEBUG_ERR, ("Attempt to unmark all databases locked when not frozen\n"));
+       if (!ctdb_db_frozen(ctdb_db)) {
+               DEBUG(DEBUG_ERR,
+                     ("Attempt to unmark database locked when not frozen\n"));
                return -1;
        }
 
-       return ctdb_db_iterator(ctdb, priority, db_lock_unmark_handler, NULL);
-}
-
-static int ctdb_lockall_unmark(struct ctdb_context *ctdb)
-{
-       uint32_t priority;
-
-       for (priority=NUM_DB_PRIORITIES; priority>=0; priority--) {
-               if (ctdb_db_iterator(ctdb, priority, db_lock_unmark_handler, NULL) != 0) {
-                       return -1;
-               }
-       }
-
-       return 0;
-}
-
-
-/*
- * Lock record / db depending on lock_ctx->type
- * Called from child context.
- */
-static bool ctdb_lock_item(struct lock_context *lock_ctx)
-{
-       bool status = false;
-
-       switch (lock_ctx->type) {
-       case LOCK_RECORD:
-               if (tdb_chainlock(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key) == 0) {
-                       status = true;
-               }
-               break;
-
-       case LOCK_DB:
-               if (tdb_lockall(lock_ctx->ctdb_db->ltdb->tdb) == 0) {
-                       status = true;
-               }
-               break;
-
-       case LOCK_ALLDB_PRIO:
-               if (ctdb_lockall_prio(lock_ctx->ctdb, lock_ctx->priority) == 0) {
-                       status = true;
-               }
-               break;
-
-       case LOCK_ALLDB:
-               if (ctdb_lockall(lock_ctx->ctdb) == 0) {
-                       status = true;
-               }
-               break;
-       }
-
-       return status;
-}
-
-
-/*
- * Unlock record / db depending on lock_ctx->type
- */
-void ctdb_unlock_item(struct lock_context *lock_ctx)
-{
-       switch (lock_ctx->type) {
-       case LOCK_RECORD:
-               tdb_chainunlock(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key);
-               break;
-
-       case LOCK_DB:
-               tdb_unlockall(lock_ctx->ctdb_db->ltdb->tdb);
-               break;
-
-       case LOCK_ALLDB_PRIO:
-               ctdb_unlockall_prio(lock_ctx->ctdb, lock_ctx->priority);
-               break;
-
-       case LOCK_ALLDB:
-               ctdb_unlockall(lock_ctx->ctdb);
-               break;
-       }
+       return db_lock_unmark_handler(ctdb_db, NULL);
 }
 
 static void ctdb_lock_schedule(struct ctdb_context *ctdb);
@@ -422,21 +188,27 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb);
  */
 static int ctdb_lock_context_destructor(struct lock_context *lock_ctx)
 {
+       if (lock_ctx->request) {
+               lock_ctx->request->lctx = NULL;
+       }
        if (lock_ctx->child > 0) {
-               ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGKILL);
-               DLIST_REMOVE(lock_ctx->ctdb->lock_current, lock_ctx);
-               lock_ctx->ctdb->lock_num_current--;
-               CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_current);
-               if (lock_ctx->type == LOCK_RECORD || lock_ctx->type == LOCK_DB) {
-                       CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
+               ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGTERM);
+               if (lock_ctx->type == LOCK_RECORD) {
+                       DLIST_REMOVE(lock_ctx->ctdb_db->lock_current, lock_ctx);
+               } else {
+                       DLIST_REMOVE(lock_ctx->ctdb->lock_current, lock_ctx);
                }
+               lock_ctx->ctdb_db->lock_num_current--;
+               CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_current);
+               CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
        } else {
-               DLIST_REMOVE(lock_ctx->ctdb->lock_pending, lock_ctx);
-               lock_ctx->ctdb->lock_num_pending--;
-               CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_pending);
-               if (lock_ctx->type == LOCK_RECORD || lock_ctx->type == LOCK_DB) {
-                       CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
+               if (lock_ctx->type == LOCK_RECORD) {
+                       DLIST_REMOVE(lock_ctx->ctdb_db->lock_pending, lock_ctx);
+               } else {
+                       DLIST_REMOVE(lock_ctx->ctdb->lock_pending, lock_ctx);
                }
+               CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_pending);
+               CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
        }
 
        ctdb_lock_schedule(lock_ctx->ctdb);
@@ -450,21 +222,16 @@ static int ctdb_lock_context_destructor(struct lock_context *lock_ctx)
  */
 static int ctdb_lock_request_destructor(struct lock_request *lock_request)
 {
-       DLIST_REMOVE(lock_request->lctx->req_queue, lock_request);
-       return 0;
-}
-
+       if (lock_request->lctx == NULL) {
+               return 0;
+       }
 
-void ctdb_lock_free_request_context(struct lock_request *lock_req)
-{
-       struct lock_context *lock_ctx;
+       lock_request->lctx->request = NULL;
+       TALLOC_FREE(lock_request->lctx);
 
-       lock_ctx = lock_req->lctx;
-       talloc_free(lock_req);
-       talloc_free(lock_ctx);
+       return 0;
 }
 
-
 /*
  * Process all the callbacks waiting for lock
  *
@@ -472,87 +239,81 @@ void ctdb_lock_free_request_context(struct lock_request *lock_req)
  */
 static void process_callbacks(struct lock_context *lock_ctx, bool locked)
 {
-       struct lock_request *request, *next;
+       struct lock_request *request;
+       bool auto_mark = lock_ctx->auto_mark;
 
-       if (lock_ctx->auto_mark && locked) {
+       if (auto_mark && locked) {
                switch (lock_ctx->type) {
                case LOCK_RECORD:
                        tdb_chainlock_mark(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key);
                        break;
 
                case LOCK_DB:
-                       tdb_lockall_mark(lock_ctx->ctdb_db->ltdb->tdb);
-                       break;
-
-               case LOCK_ALLDB_PRIO:
-                       ctdb_lockall_mark_prio(lock_ctx->ctdb, lock_ctx->priority);
-                       break;
-
-               case LOCK_ALLDB:
-                       ctdb_lockall_mark(lock_ctx->ctdb);
+                       (void)ctdb_lockdb_mark(lock_ctx->ctdb_db);
                        break;
                }
        }
 
-       /* Iterate through all callbacks */
-       request = lock_ctx->req_queue;
-       while (request) {
-               if (lock_ctx->auto_mark) {
-                       /* Reset the destructor, so request is not removed from the list */
-                       talloc_set_destructor(request, NULL);
-               }
+       request = lock_ctx->request;
+       if (auto_mark) {
+               /* Since request may be freed in the callback, unset the lock
+                * context, so request destructor will not free lock context.
+                */
+               request->lctx = NULL;
+       }
+
+       /* Since request may be freed in the callback, unset the request */
+       lock_ctx->request = NULL;
+
+       request->callback(request->private_data, locked);
 
-               /* In case, callback frees the request, store next */
-               next = request->next;
-               request->callback(request->private_data, locked);
-               request = next;
+       if (!auto_mark) {
+               return;
        }
 
-       if (lock_ctx->auto_mark && locked) {
+       if (locked) {
                switch (lock_ctx->type) {
                case LOCK_RECORD:
                        tdb_chainlock_unmark(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key);
                        break;
 
                case LOCK_DB:
-                       tdb_lockall_unmark(lock_ctx->ctdb_db->ltdb->tdb);
-                       break;
-
-               case LOCK_ALLDB_PRIO:
-                       ctdb_lockall_unmark_prio(lock_ctx->ctdb, lock_ctx->priority);
-                       break;
-
-               case LOCK_ALLDB:
-                       ctdb_lockall_unmark(lock_ctx->ctdb);
+                       ctdb_lockdb_unmark(lock_ctx->ctdb_db);
                        break;
                }
        }
+
+       talloc_free(lock_ctx);
 }
 
 
 static int lock_bucket_id(double t)
 {
-       double us = 1.e-6, ms = 1.e-3, s = 1;
+       double ms = 1.e-3, s = 1;
        int id;
 
-       if (t < 1*us) {
+       if (t < 1*ms) {
                id = 0;
-       } else if (t < 10*us) {
+       } else if (t < 10*ms) {
                id = 1;
-       } else if (t < 100*us) {
+       } else if (t < 100*ms) {
                id = 2;
-       } else if (t < 1*ms) {
+       } else if (t < 1*s) {
                id = 3;
-       } else if (t < 10*ms) {
+       } else if (t < 2*s) {
                id = 4;
-       } else if (t < 100*ms) {
+       } else if (t < 4*s) {
                id = 5;
-       } else if (t < 1*s) {
+       } else if (t < 8*s) {
                id = 6;
-       } else if (t < 10*s) {
+       } else if (t < 16*s) {
                id = 7;
-       } else {
+       } else if (t < 32*s) {
                id = 8;
+       } else if (t < 64*s) {
+               id = 9;
+       } else {
+               id = 10;
        }
 
        return id;
@@ -568,7 +329,6 @@ static void ctdb_lock_handler(struct tevent_context *ev,
                            void *private_data)
 {
        struct lock_context *lock_ctx;
-       TALLOC_CTX *tmp_ctx;
        char c;
        bool locked;
        double t;
@@ -577,54 +337,161 @@ static void ctdb_lock_handler(struct tevent_context *ev,
        lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
 
        /* cancel the timeout event */
-       if (lock_ctx->ttimer) {
-               TALLOC_FREE(lock_ctx->ttimer);
-       }
+       TALLOC_FREE(lock_ctx->ttimer);
 
        t = timeval_elapsed(&lock_ctx->start_time);
        id = lock_bucket_id(t);
 
-       if (lock_ctx->auto_mark) {
-               tmp_ctx = talloc_new(ev);
-               talloc_steal(tmp_ctx, lock_ctx);
-       }
-
        /* Read the status from the child process */
-       read(lock_ctx->fd[0], &c, 1);
-       locked = (c == 0 ? true : false);
+       if (sys_read(lock_ctx->fd[0], &c, 1) != 1) {
+               locked = false;
+       } else {
+               locked = (c == 0 ? true : false);
+       }
 
        /* Update statistics */
-       CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_pending);
        CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_calls);
-       if (lock_ctx->ctdb_db) {
-               CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
-               CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_calls);
-       }
+       CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_calls);
 
        if (locked) {
-               CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_current);
                CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.buckets[id]);
-               if (lock_ctx->ctdb_db) {
-                       CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
-                       CTDB_UPDATE_DB_LATENCY(lock_ctx->ctdb_db, lock_type_str[lock_ctx->type], locks.latency, t);
-                       CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.buckets[id]);
-               }
+               CTDB_UPDATE_LATENCY(lock_ctx->ctdb, lock_ctx->ctdb_db,
+                                   lock_type_str[lock_ctx->type], locks.latency,
+                                   lock_ctx->start_time);
+
+               CTDB_UPDATE_DB_LATENCY(lock_ctx->ctdb_db, lock_type_str[lock_ctx->type], locks.latency, t);
+               CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.buckets[id]);
        } else {
                CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_failed);
-               if (lock_ctx->ctdb_db) {
-                       CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_failed);
-               }
+               CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_failed);
        }
 
        process_callbacks(lock_ctx, locked);
+}
 
-       if (lock_ctx->auto_mark) {
-               talloc_free(tmp_ctx);
+struct lock_log_entry {
+       struct db_hash_context *lock_log;
+       TDB_DATA key;
+       unsigned long log_sec;
+       struct tevent_timer *timer;
+};
+
+static int lock_log_fetch_parser(uint8_t *keybuf, size_t keylen,
+                                uint8_t *databuf, size_t datalen,
+                                void *private_data)
+{
+       struct lock_log_entry **entry =
+               (struct lock_log_entry **)private_data;
+
+       if (datalen != sizeof(struct lock_log_entry *)) {
+               return EINVAL;
+       }
+
+       *entry = talloc_get_type_abort(*(void **)databuf,
+                                      struct lock_log_entry);
+       return 0;
+}
+
+static void lock_log_cleanup(struct tevent_context *ev,
+                            struct tevent_timer *ttimer,
+                            struct timeval current_time,
+                            void *private_data)
+{
+       struct lock_log_entry *entry = talloc_get_type_abort(
+               private_data, struct lock_log_entry);
+       int ret;
+
+       entry->timer = NULL;
+
+       ret = db_hash_delete(entry->lock_log, entry->key.dptr,
+                            entry->key.dsize);
+       if (ret != 0) {
+               return;
        }
+       talloc_free(entry);
 }
 
+static bool lock_log_skip(struct tevent_context *ev,
+                         struct db_hash_context *lock_log,
+                         TDB_DATA key, unsigned long elapsed_sec)
+{
+       struct lock_log_entry *entry = NULL;
+       int ret;
+
+       ret = db_hash_fetch(lock_log, key.dptr, key.dsize,
+                           lock_log_fetch_parser, &entry);
+       if (ret == ENOENT) {
+
+               entry = talloc_zero(lock_log, struct lock_log_entry);
+               if (entry == NULL) {
+                       goto fail;
+               }
+
+               entry->lock_log = lock_log;
 
-static void ctdb_lock_find_blocker(struct lock_context *lock_ctx);
+               entry->key.dptr = talloc_memdup(entry, key.dptr, key.dsize);
+               if (entry->key.dptr == NULL) {
+                       talloc_free(entry);
+                       goto fail;
+               }
+               entry->key.dsize = key.dsize;
+
+               entry->log_sec = elapsed_sec;
+               entry->timer = tevent_add_timer(ev, entry,
+                                               timeval_current_ofs(30, 0),
+                                               lock_log_cleanup, entry);
+               if (entry->timer == NULL) {
+                       talloc_free(entry);
+                       goto fail;
+               }
+
+               ret = db_hash_add(lock_log, key.dptr, key.dsize,
+                                 (uint8_t *)&entry,
+                                 sizeof(struct lock_log_entry *));
+               if (ret != 0) {
+                       talloc_free(entry);
+                       goto fail;
+               }
+
+               return false;
+
+       } else if (ret == EINVAL) {
+
+               ret = db_hash_delete(lock_log, key.dptr, key.dsize);
+               if (ret != 0) {
+                       goto fail;
+               }
+
+               return false;
+
+       } else if (ret == 0) {
+
+               if (elapsed_sec <= entry->log_sec) {
+                       return true;
+               }
+
+               entry->log_sec = elapsed_sec;
+
+               TALLOC_FREE(entry->timer);
+               entry->timer = tevent_add_timer(ev, entry,
+                                               timeval_current_ofs(30, 0),
+                                               lock_log_cleanup, entry);
+               if (entry->timer == NULL) {
+                       ret = db_hash_delete(lock_log, key.dptr, key.dsize);
+                       if (ret != 0) {
+                               goto fail;
+                       }
+                       talloc_free(entry);
+               }
+
+               return false;
+       }
+
+
+fail:
+       return false;
+
+}
 
 /*
  * Callback routine when required locks are not obtained within timeout
@@ -635,17 +502,69 @@ static void ctdb_lock_timeout_handler(struct tevent_context *ev,
                                    struct timeval current_time,
                                    void *private_data)
 {
+       static char debug_locks[PATH_MAX+1] = "";
        struct lock_context *lock_ctx;
        struct ctdb_context *ctdb;
+       pid_t pid;
+       double elapsed_time;
+       bool skip;
+       char *keystr;
 
        lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
        ctdb = lock_ctx->ctdb;
 
-       /* fire a child process to find the blocking process */
-       if (lock_ctx->block_child == -1) {
-               ctdb_lock_find_blocker(lock_ctx);
+       elapsed_time = timeval_elapsed(&lock_ctx->start_time);
+
+       /* For database locks, always log */
+       if (lock_ctx->type == LOCK_DB) {
+               DEBUG(DEBUG_WARNING,
+                     ("Unable to get DB lock on database %s for "
+                      "%.0lf seconds\n",
+                      lock_ctx->ctdb_db->db_name, elapsed_time));
+               goto lock_debug;
+       }
+
+       /* For record locks, check if we have already logged */
+       skip = lock_log_skip(ev, lock_ctx->ctdb_db->lock_log,
+                            lock_ctx->key, (unsigned long)elapsed_time);
+       if (skip) {
+               goto skip_lock_debug;
+       }
+
+       keystr = hex_encode_talloc(lock_ctx, lock_ctx->key.dptr,
+                                  lock_ctx->key.dsize);
+       DEBUG(DEBUG_WARNING,
+             ("Unable to get RECORD lock on database %s for %.0lf seconds"
+              " (key %s)\n",
+              lock_ctx->ctdb_db->db_name, elapsed_time,
+              keystr ? keystr : ""));
+       TALLOC_FREE(keystr);
+
+       /* If a node stopped/banned, don't spam the logs */
+       if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE) {
+               goto skip_lock_debug;
+       }
+
+lock_debug:
+
+       if (ctdb_set_helper("lock debugging helper",
+                           debug_locks, sizeof(debug_locks),
+                           "CTDB_DEBUG_LOCKS",
+                           getenv("CTDB_BASE"), "debug_locks.sh")) {
+               pid = vfork();
+               if (pid == 0) {
+                       execl(debug_locks, debug_locks, NULL);
+                       _exit(0);
+               }
+               ctdb_track_child(ctdb, pid);
+       } else {
+               DEBUG(DEBUG_WARNING,
+                     (__location__
+                      " Unable to setup lock debugging\n"));
        }
 
+skip_lock_debug:
+
        /* reset the timeout timer */
        // talloc_free(lock_ctx->ttimer);
        lock_ctx->ttimer = tevent_add_timer(ctdb->ev,
@@ -655,75 +574,148 @@ static void ctdb_lock_timeout_handler(struct tevent_context *ev,
                                            (void *)lock_ctx);
 }
 
-
-static char *lock_child_log_prefix(struct lock_context *lock_ctx)
+static bool lock_helper_args(TALLOC_CTX *mem_ctx,
+                            struct lock_context *lock_ctx, int fd,
+                            int *argc, const char ***argv)
 {
-       char *prefix;
-       pid_t pid;
-
-       pid = getpid();
+       const char **args = NULL;
+       int nargs = 0, i;
 
        switch (lock_ctx->type) {
        case LOCK_RECORD:
-               prefix = talloc_asprintf(NULL, "lockR(%d): ", pid);
+               nargs = 6;
                break;
 
        case LOCK_DB:
-               prefix = talloc_asprintf(NULL, "lockD(%d): ", pid);
+               nargs = 5;
                break;
+       }
+
+       /* Add extra argument for null termination */
+       nargs++;
+
+       args = talloc_array(mem_ctx, const char *, nargs);
+       if (args == NULL) {
+               return false;
+       }
+
+       args[0] = talloc_asprintf(args, "%d", getpid());
+       args[1] = talloc_asprintf(args, "%d", fd);
 
-       case LOCK_ALLDB_PRIO:
-               prefix = talloc_asprintf(NULL, "lockP(%d): ", pid);
+       switch (lock_ctx->type) {
+       case LOCK_RECORD:
+               args[2] = talloc_strdup(args, "RECORD");
+               args[3] = talloc_strdup(args, lock_ctx->ctdb_db->db_path);
+               args[4] = talloc_asprintf(args, "0x%x",
+                               tdb_get_flags(lock_ctx->ctdb_db->ltdb->tdb));
+               if (lock_ctx->key.dsize == 0) {
+                       args[5] = talloc_strdup(args, "NULL");
+               } else {
+                       args[5] = hex_encode_talloc(args, lock_ctx->key.dptr, lock_ctx->key.dsize);
+               }
                break;
 
-       case LOCK_ALLDB:
-               prefix = talloc_asprintf(NULL, "lockA(%d): ", pid);
+       case LOCK_DB:
+               args[2] = talloc_strdup(args, "DB");
+               args[3] = talloc_strdup(args, lock_ctx->ctdb_db->db_path);
+               args[4] = talloc_asprintf(args, "0x%x",
+                               tdb_get_flags(lock_ctx->ctdb_db->ltdb->tdb));
                break;
        }
 
-       return prefix;
-}
+       /* Make sure last argument is NULL */
+       args[nargs-1] = NULL;
+
+       for (i=0; i<nargs-1; i++) {
+               if (args[i] == NULL) {
+                       talloc_free(args);
+                       return false;
+               }
+       }
 
+       *argc = nargs;
+       *argv = args;
+       return true;
+}
 
 /*
- * Schedule a new lock child process
- * Set up callback handler and timeout handler
+ * Find a lock request that can be scheduled
  */
-static void ctdb_lock_schedule(struct ctdb_context *ctdb)
+static struct lock_context *ctdb_find_lock_context(struct ctdb_context *ctdb)
 {
        struct lock_context *lock_ctx, *next_ctx;
-       int ret;
-       pid_t parent;
+       struct ctdb_db_context *ctdb_db;
 
-       if (ctdb->lock_num_current >= MAX_LOCK_PROCESSES_PER_DB) {
-               return;
-       }
+       /* First check if there are database lock requests */
 
-       if (ctdb->lock_pending == NULL) {
-               return;
+       for (lock_ctx = ctdb->lock_pending; lock_ctx != NULL;
+            lock_ctx = next_ctx) {
+
+               if (lock_ctx->request != NULL) {
+                       /* Found a lock context with a request */
+                       return lock_ctx;
+               }
+
+               next_ctx = lock_ctx->next;
+
+               DEBUG(DEBUG_INFO, ("Removing lock context without lock "
+                                  "request\n"));
+               DLIST_REMOVE(ctdb->lock_pending, lock_ctx);
+               CTDB_DECREMENT_STAT(ctdb, locks.num_pending);
+               CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
+               talloc_free(lock_ctx);
        }
 
-       /* Find a lock context with requests */
-       lock_ctx = ctdb->lock_pending;
-       while (lock_ctx != NULL) {
-               if (! lock_ctx->req_queue) {
+       /* Next check database queues */
+       for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
+               if (ctdb_db->lock_num_current ==
+                   ctdb->tunable.lock_processes_per_db) {
+                       continue;
+               }
+
+               for (lock_ctx = ctdb_db->lock_pending; lock_ctx != NULL;
+                    lock_ctx = next_ctx) {
+
                        next_ctx = lock_ctx->next;
-                       DEBUG(DEBUG_INFO, ("Removing lock context without lock requests\n"));
-                       DLIST_REMOVE(ctdb->lock_pending, lock_ctx);
-                       ctdb->lock_num_pending--;
-                       CTDB_DECREMENT_STAT(ctdb, locks.num_pending);
-                       if (lock_ctx->ctdb_db) {
-                               CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
+
+                       if (lock_ctx->request != NULL) {
+                               return lock_ctx;
                        }
+
+                       DEBUG(DEBUG_INFO, ("Removing lock context without "
+                                          "lock request\n"));
+                       DLIST_REMOVE(ctdb_db->lock_pending, lock_ctx);
+                       CTDB_DECREMENT_STAT(ctdb, locks.num_pending);
+                       CTDB_DECREMENT_DB_STAT(ctdb_db, locks.num_pending);
                        talloc_free(lock_ctx);
-                       lock_ctx = next_ctx;
-                       continue;
-               } else {
-                       /* Found a lock context with lock requests */
-                       break;
                }
        }
 
+       return NULL;
+}
+
+/*
+ * Schedule a new lock child process
+ * Set up callback handler and timeout handler
+ */
+static void ctdb_lock_schedule(struct ctdb_context *ctdb)
+{
+       struct lock_context *lock_ctx;
+       int ret, argc;
+       TALLOC_CTX *tmp_ctx;
+       static char prog[PATH_MAX+1] = "";
+       const char **args;
+
+       if (!ctdb_set_helper("lock helper",
+                            prog, sizeof(prog),
+                            "CTDB_LOCK_HELPER",
+                            CTDB_HELPER_BINDIR, "ctdb_lock_helper")) {
+               ctdb_die(ctdb, __location__
+                        " Unable to set lock helper\n");
+       }
+
+       /* Find a lock context with requests */
+       lock_ctx = ctdb_find_lock_context(ctdb);
        if (lock_ctx == NULL) {
                return;
        }
@@ -735,40 +727,49 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
                return;
        }
 
-       parent = getpid();
-       lock_ctx->child = ctdb_fork(ctdb);
+       set_close_on_exec(lock_ctx->fd[0]);
 
-       if (lock_ctx->child == (pid_t)-1) {
-               DEBUG(DEBUG_ERR, ("Failed to create a child in ctdb_lock_schedule\n"));
+       /* Create data for child process */
+       tmp_ctx = talloc_new(lock_ctx);
+       if (tmp_ctx == NULL) {
+               DEBUG(DEBUG_ERR, ("Failed to allocate memory for helper args\n"));
                close(lock_ctx->fd[0]);
                close(lock_ctx->fd[1]);
                return;
        }
 
-       /* Child process */
-       if (lock_ctx->child == 0) {
-               char c;
-               close(lock_ctx->fd[0]);
-               debug_extra = lock_child_log_prefix(lock_ctx);
-               if (ctdb_lock_item(lock_ctx)) {
-                       c = 0;
-               } else {
-                       c = 1;
+       if (! ctdb->do_setsched) {
+               ret = setenv("CTDB_NOSETSCHED", "1", 1);
+               if (ret != 0) {
+                       DEBUG(DEBUG_WARNING,
+                             ("Failed to set CTDB_NOSETSCHED variable\n"));
                }
-               write(lock_ctx->fd[1], &c, 1);
+       }
 
-               /* Hang around, but if parent dies, terminate */
-               while (kill(parent, 0) == 0 || errno != ESRCH) {
-                       sleep(5);
-               }
-               _exit(0);
+       /* Create arguments for lock helper */
+       if (!lock_helper_args(tmp_ctx, lock_ctx, lock_ctx->fd[1],
+                             &argc, &args)) {
+               DEBUG(DEBUG_ERR, ("Failed to create lock helper args\n"));
+               close(lock_ctx->fd[0]);
+               close(lock_ctx->fd[1]);
+               talloc_free(tmp_ctx);
+               return;
+       }
+
+       lock_ctx->child = ctdb_vfork_exec(lock_ctx, ctdb, prog, argc,
+                                         (const char **)args);
+       if (lock_ctx->child == -1) {
+               DEBUG(DEBUG_ERR, ("Failed to create a child in ctdb_lock_schedule\n"));
+               close(lock_ctx->fd[0]);
+               close(lock_ctx->fd[1]);
+               talloc_free(tmp_ctx);
+               return;
        }
 
        /* Parent process */
        close(lock_ctx->fd[1]);
-       set_close_on_exec(lock_ctx->fd[0]);
 
-       talloc_set_destructor(lock_ctx, ctdb_lock_context_destructor);
+       talloc_free(tmp_ctx);
 
        /* Set up timeout handler */
        lock_ctx->ttimer = tevent_add_timer(ctdb->ev,
@@ -777,9 +778,8 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
                                            ctdb_lock_timeout_handler,
                                            (void *)lock_ctx);
        if (lock_ctx->ttimer == NULL) {
-               ctdb_kill(ctdb, lock_ctx->child, SIGKILL);
+               ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
                lock_ctx->child = -1;
-               talloc_set_destructor(lock_ctx, NULL);
                close(lock_ctx->fd[0]);
                return;
        }
@@ -788,84 +788,39 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
        lock_ctx->tfd = tevent_add_fd(ctdb->ev,
                                      lock_ctx,
                                      lock_ctx->fd[0],
-                                     EVENT_FD_READ,
+                                     TEVENT_FD_READ,
                                      ctdb_lock_handler,
                                      (void *)lock_ctx);
        if (lock_ctx->tfd == NULL) {
                TALLOC_FREE(lock_ctx->ttimer);
-               ctdb_kill(ctdb, lock_ctx->child, SIGKILL);
+               ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
                lock_ctx->child = -1;
-               talloc_set_destructor(lock_ctx, NULL);
                close(lock_ctx->fd[0]);
                return;
        }
        tevent_fd_set_auto_close(lock_ctx->tfd);
 
        /* Move the context from pending to current */
-       DLIST_REMOVE(ctdb->lock_pending, lock_ctx);
-       ctdb->lock_num_pending--;
-       DLIST_ADD_END(ctdb->lock_current, lock_ctx, NULL);
-       ctdb->lock_num_current++;
-}
-
-
-/*
- * Find the lock context of a given type
- */
-static struct lock_context *find_lock_context(struct lock_context *lock_list,
-                                             struct ctdb_db_context *ctdb_db,
-                                             TDB_DATA key,
-                                             uint32_t priority,
-                                             enum lock_type type)
-{
-       struct lock_context *lock_ctx;
-
-       /* Search active locks */
-       for (lock_ctx=lock_list; lock_ctx; lock_ctx=lock_ctx->next) {
-               if (lock_ctx->type != type) {
-                       continue;
-               }
-
-               switch (lock_ctx->type) {
-               case LOCK_RECORD:
-                       if (ctdb_db == lock_ctx->ctdb_db &&
-                           key.dsize == lock_ctx->key.dsize &&
-                           memcmp(key.dptr, lock_ctx->key.dptr, key.dsize) == 0) {
-                               goto done;
-                       }
-                       break;
-
-               case LOCK_DB:
-                       if (ctdb_db == lock_ctx->ctdb_db) {
-                               goto done;
-                       }
-                       break;
-
-               case LOCK_ALLDB_PRIO:
-                       if (priority == lock_ctx->priority) {
-                               goto done;
-                       }
-                       break;
-
-               case LOCK_ALLDB:
-                       goto done;
-                       break;
-               }
+       if (lock_ctx->type == LOCK_RECORD) {
+               DLIST_REMOVE(lock_ctx->ctdb_db->lock_pending, lock_ctx);
+               DLIST_ADD_END(lock_ctx->ctdb_db->lock_current, lock_ctx);
+       } else {
+               DLIST_REMOVE(ctdb->lock_pending, lock_ctx);
+               DLIST_ADD_END(ctdb->lock_current, lock_ctx);
        }
-
-       /* Did not find the lock context we are searching for */
-       lock_ctx = NULL;
-
-done:
-       return lock_ctx;
-
+       CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_pending);
+       CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_current);
+       lock_ctx->ctdb_db->lock_num_current++;
+       CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
+       CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
 }
 
 
 /*
  * Lock record / db depending on type
  */
-static struct lock_request *ctdb_lock_internal(struct ctdb_context *ctdb,
+static struct lock_request *ctdb_lock_internal(TALLOC_CTX *mem_ctx,
+                                              struct ctdb_context *ctdb,
                                               struct ctdb_db_context *ctdb_db,
                                               TDB_DATA key,
                                               uint32_t priority,
@@ -874,7 +829,7 @@ static struct lock_request *ctdb_lock_internal(struct ctdb_context *ctdb,
                                               enum lock_type type,
                                               bool auto_mark)
 {
-       struct lock_context *lock_ctx;
+       struct lock_context *lock_ctx = NULL;
        struct lock_request *request;
 
        if (callback == NULL) {
@@ -882,54 +837,61 @@ static struct lock_request *ctdb_lock_internal(struct ctdb_context *ctdb,
                return NULL;
        }
 
-       /* get a context for this key - search only the pending contexts,
-        * current contexts might in the middle of processing callbacks */
-       lock_ctx = find_lock_context(ctdb->lock_pending, ctdb_db, key, priority, type);
-
-       /* No existing context, create one */
+       lock_ctx = talloc_zero(ctdb, struct lock_context);
        if (lock_ctx == NULL) {
-               lock_ctx = talloc_zero(ctdb, struct lock_context);
-               if (lock_ctx == NULL) {
-                       DEBUG(DEBUG_ERR, ("Failed to create a new lock context\n"));
-                       return NULL;
-               }
-
-               lock_ctx->type = type;
-               lock_ctx->ctdb = ctdb;
-               lock_ctx->ctdb_db = ctdb_db;
-               lock_ctx->key.dsize = key.dsize;
-               if (key.dsize > 0) {
-                       lock_ctx->key.dptr = talloc_memdup(lock_ctx, key.dptr, key.dsize);
-               } else {
-                       lock_ctx->key.dptr = NULL;
-               }
-               lock_ctx->priority = priority;
-               lock_ctx->auto_mark = auto_mark;
+               DEBUG(DEBUG_ERR, ("Failed to create a new lock context\n"));
+               return NULL;
+       }
 
-               lock_ctx->child = -1;
-               lock_ctx->block_child = -1;
+       if ((request = talloc_zero(mem_ctx, struct lock_request)) == NULL) {
+               talloc_free(lock_ctx);
+               return NULL;
+       }
 
-               DLIST_ADD_END(ctdb->lock_pending, lock_ctx, NULL);
-               ctdb->lock_num_pending++;
-               CTDB_INCREMENT_STAT(ctdb, locks.num_pending);
-               if (ctdb_db) {
-                       CTDB_INCREMENT_DB_STAT(ctdb_db, locks.num_pending);
+       lock_ctx->type = type;
+       lock_ctx->ctdb = ctdb;
+       lock_ctx->ctdb_db = ctdb_db;
+       lock_ctx->key.dsize = key.dsize;
+       if (key.dsize > 0) {
+               lock_ctx->key.dptr = talloc_memdup(lock_ctx, key.dptr, key.dsize);
+               if (lock_ctx->key.dptr == NULL) {
+                       DEBUG(DEBUG_ERR, (__location__ "Memory allocation error\n"));
+                       talloc_free(lock_ctx);
+                       talloc_free(request);
+                       return NULL;
                }
-
-               /* Start the timer when we activate the context */
-               lock_ctx->start_time = timeval_current();
+               lock_ctx->key_hash = ctdb_hash(&key);
+       } else {
+               lock_ctx->key.dptr = NULL;
        }
+       lock_ctx->priority = priority;
+       lock_ctx->auto_mark = auto_mark;
 
-       if ((request = talloc_zero(lock_ctx, struct lock_request)) == NULL) {
-               return NULL;
+       lock_ctx->request = request;
+       lock_ctx->child = -1;
+
+       /* Non-record locks are required by recovery and should be scheduled
+        * immediately, so keep them at the head of the pending queue.
+        */
+       if (lock_ctx->type == LOCK_RECORD) {
+               DLIST_ADD_END(ctdb_db->lock_pending, lock_ctx);
+       } else {
+               DLIST_ADD_END(ctdb->lock_pending, lock_ctx);
+       }
+       CTDB_INCREMENT_STAT(ctdb, locks.num_pending);
+       if (ctdb_db) {
+               CTDB_INCREMENT_DB_STAT(ctdb_db, locks.num_pending);
        }
 
+       /* Start the timer when we activate the context */
+       lock_ctx->start_time = timeval_current();
+
        request->lctx = lock_ctx;
        request->callback = callback;
        request->private_data = private_data;
 
        talloc_set_destructor(request, ctdb_lock_request_destructor);
-       DLIST_ADD_END(lock_ctx->req_queue, request, NULL);
+       talloc_set_destructor(lock_ctx, ctdb_lock_context_destructor);
 
        ctdb_lock_schedule(ctdb);
 
@@ -940,13 +902,15 @@ static struct lock_request *ctdb_lock_internal(struct ctdb_context *ctdb,
 /*
  * obtain a lock on a record in a database
  */
-struct lock_request *ctdb_lock_record(struct ctdb_db_context *ctdb_db,
+struct lock_request *ctdb_lock_record(TALLOC_CTX *mem_ctx,
+                                     struct ctdb_db_context *ctdb_db,
                                      TDB_DATA key,
                                      bool auto_mark,
                                      void (*callback)(void *, bool),
                                      void *private_data)
 {
-       return ctdb_lock_internal(ctdb_db->ctdb,
+       return ctdb_lock_internal(mem_ctx,
+                                 ctdb_db->ctdb,
                                  ctdb_db,
                                  key,
                                  0,
@@ -960,12 +924,14 @@ struct lock_request *ctdb_lock_record(struct ctdb_db_context *ctdb_db,
 /*
  * obtain a lock on a database
  */
-struct lock_request *ctdb_lock_db(struct ctdb_db_context *ctdb_db,
+struct lock_request *ctdb_lock_db(TALLOC_CTX *mem_ctx,
+                                 struct ctdb_db_context *ctdb_db,
                                  bool auto_mark,
                                  void (*callback)(void *, bool),
                                  void *private_data)
 {
-       return ctdb_lock_internal(ctdb_db->ctdb,
+       return ctdb_lock_internal(mem_ctx,
+                                 ctdb_db->ctdb,
                                  ctdb_db,
                                  tdb_null,
                                  0,
@@ -974,196 +940,3 @@ struct lock_request *ctdb_lock_db(struct ctdb_db_context *ctdb_db,
                                  LOCK_DB,
                                  auto_mark);
 }
-
-
-/*
- * obtain locks on all databases of specified priority
- */
-struct lock_request *ctdb_lock_alldb_prio(struct ctdb_context *ctdb,
-                                         uint32_t priority,
-                                         bool auto_mark,
-                                         void (*callback)(void *, bool),
-                                         void *private_data)
-{
-       if (priority < 0 || priority > NUM_DB_PRIORITIES) {
-               DEBUG(DEBUG_ERR, ("Invalid db priority: %u\n", priority));
-               return NULL;
-       }
-
-       return ctdb_lock_internal(ctdb,
-                                 NULL,
-                                 tdb_null,
-                                 priority,
-                                 callback,
-                                 private_data,
-                                 LOCK_ALLDB_PRIO,
-                                 auto_mark);
-}
-
-
-/*
- * obtain locks on all databases
- */
-struct lock_request *ctdb_lock_alldb(struct ctdb_context *ctdb,
-                                    bool auto_mark,
-                                    void (*callback)(void *, bool),
-                                    void *private_data)
-{
-       return ctdb_lock_internal(ctdb,
-                                 NULL,
-                                 tdb_null,
-                                 0,
-                                 callback,
-                                 private_data,
-                                 LOCK_ALLDB,
-                                 auto_mark);
-}
-
-/*
- * Callback routine to read the PID of blocking process from the child and log
- *
- */
-void ctdb_lock_blocked_handler(struct tevent_context *ev,
-                               struct tevent_fd *tfd,
-                               uint16_t flags,
-                               void *private_data)
-{
-       struct lock_context *lock_ctx;
-       pid_t blocker_pid = -1;
-       char *process_name = NULL;
-       const char *db_name = NULL;
-       ino_t inode;
-       struct ctdb_db_context *ctdb_db;
-       int fd;
-       struct stat stat_buf;
-
-       lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
-
-       if (read(lock_ctx->block_fd[0], &blocker_pid, sizeof(blocker_pid)) != sizeof(blocker_pid)) {
-               DEBUG(DEBUG_ERR, ("Error reading blocker process pid from child\n"));
-               goto failed;
-       }
-       if (read(lock_ctx->block_fd[0], &inode, sizeof(inode)) != sizeof(inode)) {
-               DEBUG(DEBUG_ERR, ("Error reading blocked inode from child\n"));
-               goto failed;
-       }
-
-       if (blocker_pid < 0) {
-               goto failed;
-       }
-
-       process_name = ctdb_get_process_name(blocker_pid);
-
-       if (lock_ctx->type == LOCK_RECORD || lock_ctx->type == LOCK_DB) {
-               db_name = lock_ctx->ctdb_db->ltdb->name;
-       } else {
-               for (ctdb_db = lock_ctx->ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
-                       fd = tdb_fd(ctdb_db->ltdb->tdb);
-                       if (fstat(fd, &stat_buf) == 0) {
-                               if (stat_buf.st_ino == inode) {
-                                       db_name = ctdb_db->ltdb->name;
-                                       break;
-                               }
-                       }
-               }
-       }
-
-       if (db_name) {
-               DEBUG(DEBUG_WARNING,
-                     ("Process (pid=%d) blocked in locking\n", lock_ctx->child));
-               DEBUG(DEBUG_WARNING,
-                     ("Process %s (pid=%d) locked database %s (inode %lu) for %.0lf seconds\n",
-                      (process_name ? process_name : "unknown"),
-                      blocker_pid, db_name, (unsigned long)inode,
-                      timeval_elapsed(&lock_ctx->start_time)));
-       } else {
-               DEBUG(DEBUG_WARNING,
-                     ("Process %s (pid=%d) locked database (inode %lu) for %.0lf seconds\n",
-                      (process_name ? process_name : "unknown"),
-                      blocker_pid, (unsigned long)inode,
-                      timeval_elapsed(&lock_ctx->start_time)));
-       }
-
-       /*
-        * If ctdb is blocked by smbd for deadlock_interval, detect it as a deadlock
-        * and kill smbd process.
-        */
-       if (lock_ctx->ctdb->tunable.deadlock_timeout > 0 &&
-           timeval_elapsed(&lock_ctx->start_time) > lock_ctx->ctdb->tunable.deadlock_timeout &&
-           process_name && strstr(process_name, "smbd")) {
-               DEBUG(DEBUG_WARNING,
-                     ("Deadlock detected. Killing smbd process (pid=%d)", blocker_pid));
-               kill(blocker_pid, SIGKILL);
-       }
-
-       free(process_name);
-
-failed:
-       if (lock_ctx->block_child > 0) {
-               ctdb_kill(lock_ctx->ctdb, lock_ctx->block_child, SIGKILL);
-       }
-       lock_ctx->block_child = -1;
-       talloc_free(tfd);
-}
-
-
-/*
- * Find processes that holds lock we are interested in
- */
-void ctdb_lock_find_blocker(struct lock_context *lock_ctx)
-{
-       struct tevent_fd *tfd;
-       pid_t parent;
-
-       if (pipe(lock_ctx->block_fd) < 0) {
-               return;
-       }
-
-       parent = getpid();
-
-       lock_ctx->block_child = ctdb_fork(lock_ctx->ctdb);
-       if (lock_ctx->block_child == -1) {
-               close(lock_ctx->block_fd[0]);
-               close(lock_ctx->block_fd[1]);
-               return;
-       }
-
-       /* Child process */
-       if (lock_ctx->block_child == 0) {
-               struct ctdb_lock_info reqlock;
-               pid_t blocker_pid = -1;
-               bool status;
-
-               close(lock_ctx->block_fd[0]);
-               if (ctdb_get_lock_info(lock_ctx->child, &reqlock)) {
-                       status = ctdb_get_blocker_pid(&reqlock, &blocker_pid);
-                       if (!status) {
-                               /* Could not find blocker pid */
-                               blocker_pid = -2;
-                       }
-               }
-               write(lock_ctx->block_fd[1], &blocker_pid, sizeof(blocker_pid));
-               write(lock_ctx->block_fd[1], &reqlock.inode, sizeof(reqlock.inode));
-
-               /* Hang around till parent dies */
-               while (kill(parent, 0) == 0 || errno != ESRCH) {
-                       sleep(5);
-               }
-               _exit(0);
-       }
-
-       /* Parent process */
-       close(lock_ctx->block_fd[1]);
-       set_close_on_exec(lock_ctx->block_fd[0]);
-
-       tfd = tevent_add_fd(lock_ctx->ctdb->ev,
-                               lock_ctx,
-                               lock_ctx->block_fd[0],
-                               EVENT_FD_READ,
-                               ctdb_lock_blocked_handler,
-                               (void *)lock_ctx);
-       if (tfd == NULL) {
-               ctdb_kill(lock_ctx->ctdb, lock_ctx->block_child, SIGKILL);
-               close(lock_ctx->block_fd[0]);
-       }
-}