lib: Fix a typo
[samba.git] / source3 / lib / ctdbd_conn.c
index 07d1282dc31c16961e8ab713bceb20fbe522251f..9832dfae7b42a2254ced1c09927989fb523ea106 100644 (file)
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include "includes.h"
+#include "replace.h"
+#include <tevent.h>
 #include "util_tdb.h"
 #include "serverid.h"
 #include "ctdbd_conn.h"
 #include "system/select.h"
-#include "lib/sys_rw_data.h"
+#include "lib/util/sys_rw_data.h"
 #include "lib/util/iov_buf.h"
-
-#include "messages.h"
+#include "lib/util/select.h"
+#include "lib/util/debug.h"
+#include "lib/util/talloc_stack.h"
+#include "lib/util/genrand.h"
+#include "lib/util/fault.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/tevent_unix.c"
+#include "lib/util/sys_rw.h"
+#include "lib/util/blocking.h"
+#include "ctdb/include/ctdb_protocol.h"
 
 /* paths to these include files come from --with-ctdb= in configure */
 
-#include "ctdb.h"
-#include "ctdb_private.h"
-
 struct ctdbd_srvid_cb {
        uint64_t srvid;
-       int (*cb)(uint32_t src_vnn, uint32_t dst_vnn,
+       int (*cb)(struct tevent_context *ev,
+                 uint32_t src_vnn, uint32_t dst_vnn,
                  uint64_t dst_srvid,
                  const uint8_t *msg, size_t msglen,
                  void *private_data);
        void *private_data;
 };
 
+struct ctdb_pkt_send_state;
+struct ctdb_pkt_recv_state;
+
 struct ctdbd_connection {
-       struct messaging_context *msg_ctx;
        uint32_t reqid;
        uint32_t our_vnn;
        uint64_t rand_srvid;
        struct ctdbd_srvid_cb *callbacks;
        int fd;
-       struct tevent_fd *fde;
        int timeout;
+
+       /* For async connections, enabled via ctdbd_setup_fde() */
+       struct tevent_fd *fde;
+
+       /* State to track in-progress read */
+       struct ctdb_read_state {
+               /* Receive buffer for the initial packet length */
+               uint32_t msglen;
+
+               /* iovec state for current read */
+               struct iovec iov;
+               struct iovec *iovs;
+               int iovcnt;
+
+               /* allocated receive buffer based on packet length */
+               struct ctdb_req_header *hdr;
+       } read_state;
+
+       /* Lists of pending async reads and writes */
+       struct ctdb_pkt_recv_state *recv_list;
+       struct ctdb_pkt_send_state *send_list;
 };
 
+static void ctdbd_async_socket_handler(struct tevent_context *ev,
+                                      struct tevent_fd *fde,
+                                      uint16_t flags,
+                                      void *private_data);
+
+static bool ctdbd_conn_has_async_sends(struct ctdbd_connection *conn)
+{
+       return (conn->send_list != NULL);
+}
+
+static bool ctdbd_conn_has_async_reqs(struct ctdbd_connection *conn)
+{
+       return (conn->fde != NULL);
+}
+
 static uint32_t ctdbd_next_reqid(struct ctdbd_connection *conn)
 {
        conn->reqid += 1;
@@ -62,11 +106,12 @@ static uint32_t ctdbd_next_reqid(struct ctdbd_connection *conn)
        return conn->reqid;
 }
 
-static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
-                             uint32_t vnn, uint32_t opcode,
-                             uint64_t srvid, uint32_t flags, TDB_DATA data,
-                             TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
-                             int *cstatus);
+static int ctdbd_control(struct ctdbd_connection *conn,
+                        uint32_t vnn, uint32_t opcode,
+                        uint64_t srvid, uint32_t flags,
+                        TDB_DATA data,
+                        TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
+                        int32_t *cstatus);
 
 /*
  * exit on fatal communications errors with the ctdbd daemon
@@ -98,24 +143,24 @@ static void ctdb_packet_dump(struct ctdb_req_header *hdr)
 /*
  * Register a srvid with ctdbd
  */
-NTSTATUS register_with_ctdbd(struct ctdbd_connection *conn, uint64_t srvid,
-                            int (*cb)(uint32_t src_vnn, uint32_t dst_vnn,
-                                      uint64_t dst_srvid,
-                                      const uint8_t *msg, size_t msglen,
-                                      void *private_data),
-                            void *private_data)
+int register_with_ctdbd(struct ctdbd_connection *conn, uint64_t srvid,
+                       int (*cb)(struct tevent_context *ev,
+                                 uint32_t src_vnn, uint32_t dst_vnn,
+                                 uint64_t dst_srvid,
+                                 const uint8_t *msg, size_t msglen,
+                                 void *private_data),
+                       void *private_data)
 {
 
-       NTSTATUS status;
-       int cstatus;
+       int ret;
+       int32_t cstatus;
        size_t num_callbacks;
        struct ctdbd_srvid_cb *tmp;
 
-       status = ctdbd_control(conn, CTDB_CURRENT_NODE,
-                              CTDB_CONTROL_REGISTER_SRVID, srvid, 0,
-                              tdb_null, NULL, NULL, &cstatus);
-       if (!NT_STATUS_IS_OK(status)) {
-               return status;
+       ret = ctdbd_control_local(conn, CTDB_CONTROL_REGISTER_SRVID, srvid, 0,
+                                 tdb_null, NULL, NULL, &cstatus);
+       if (ret != 0) {
+               return ret;
        }
 
        num_callbacks = talloc_array_length(conn->callbacks);
@@ -123,7 +168,7 @@ NTSTATUS register_with_ctdbd(struct ctdbd_connection *conn, uint64_t srvid,
        tmp = talloc_realloc(conn, conn->callbacks, struct ctdbd_srvid_cb,
                             num_callbacks + 1);
        if (tmp == NULL) {
-               return NT_STATUS_NO_MEMORY;
+               return ENOMEM;
        }
        conn->callbacks = tmp;
 
@@ -131,26 +176,26 @@ NTSTATUS register_with_ctdbd(struct ctdbd_connection *conn, uint64_t srvid,
                .srvid = srvid, .cb = cb, .private_data = private_data
        };
 
-       return NT_STATUS_OK;
+       return 0;
 }
 
-static int ctdbd_msg_call_back(struct ctdbd_connection *conn,
-                              struct ctdb_req_message *msg)
+static int ctdbd_msg_call_back(struct tevent_context *ev,
+                              struct ctdbd_connection *conn,
+                              struct ctdb_req_message_old *msg)
 {
-       size_t msg_len;
+       uint32_t msg_len;
        size_t i, num_callbacks;
 
        msg_len = msg->hdr.length;
-       if (msg_len < offsetof(struct ctdb_req_message, data)) {
-               DEBUG(10, ("%s: len %u too small\n", __func__,
-                          (unsigned)msg_len));
+       if (msg_len < offsetof(struct ctdb_req_message_old, data)) {
+               DBG_DEBUG("len %"PRIu32" too small\n", msg_len);
                return 0;
        }
-       msg_len -= offsetof(struct ctdb_req_message, data);
+       msg_len -= offsetof(struct ctdb_req_message_old, data);
 
        if (msg_len < msg->datalen) {
-               DEBUG(10, ("%s: msg_len=%u < msg->datalen=%u\n", __func__,
-                          (unsigned)msg_len, (unsigned)msg->datalen));
+               DBG_DEBUG("msg_len=%"PRIu32" < msg->datalen=%"PRIu32"\n",
+                         msg_len, msg->datalen);
                return 0;
        }
 
@@ -162,7 +207,8 @@ static int ctdbd_msg_call_back(struct ctdbd_connection *conn,
                if ((cb->srvid == msg->srvid) && (cb->cb != NULL)) {
                        int ret;
 
-                       ret = cb->cb(msg->hdr.srcnode, msg->hdr.destnode,
+                       ret = cb->cb(ev,
+                                    msg->hdr.srcnode, msg->hdr.destnode,
                                     msg->srvid, msg->data, msg->datalen,
                                     cb->private_data);
                        if (ret != 0) {
@@ -176,19 +222,18 @@ static int ctdbd_msg_call_back(struct ctdbd_connection *conn,
 /*
  * get our vnn from the cluster
  */
-static NTSTATUS get_cluster_vnn(struct ctdbd_connection *conn, uint32_t *vnn)
+static int get_cluster_vnn(struct ctdbd_connection *conn, uint32_t *vnn)
 {
        int32_t cstatus=-1;
-       NTSTATUS status;
-       status = ctdbd_control(conn,
-                              CTDB_CURRENT_NODE, CTDB_CONTROL_GET_PNN, 0, 0,
-                              tdb_null, NULL, NULL, &cstatus);
-       if (!NT_STATUS_IS_OK(status)) {
-               DEBUG(1, ("ctdbd_control failed: %s\n", nt_errstr(status)));
-               return status;
+       int ret;
+       ret = ctdbd_control_local(conn, CTDB_CONTROL_GET_PNN, 0, 0,
+                                 tdb_null, NULL, NULL, &cstatus);
+       if (ret != 0) {
+               DEBUG(1, ("ctdbd_control failed: %s\n", strerror(ret)));
+               return ret;
        }
        *vnn = (uint32_t)cstatus;
-       return status;
+       return ret;
 }
 
 /*
@@ -197,18 +242,16 @@ static NTSTATUS get_cluster_vnn(struct ctdbd_connection *conn, uint32_t *vnn)
 static bool ctdbd_working(struct ctdbd_connection *conn, uint32_t vnn)
 {
        int32_t cstatus=-1;
-       NTSTATUS status;
        TDB_DATA outdata;
-       struct ctdb_node_map *m;
-       uint32_t failure_flags;
-       bool ret = false;
-       int i;
-
-       status = ctdbd_control(conn, CTDB_CURRENT_NODE,
-                              CTDB_CONTROL_GET_NODEMAP, 0, 0,
-                              tdb_null, talloc_tos(), &outdata, &cstatus);
-       if (!NT_STATUS_IS_OK(status)) {
-               DEBUG(1, ("ctdbd_control failed: %s\n", nt_errstr(status)));
+       struct ctdb_node_map_old *m;
+       bool ok = false;
+       uint32_t i;
+       int ret;
+
+       ret = ctdbd_control_local(conn, CTDB_CONTROL_GET_NODEMAP, 0, 0,
+                                 tdb_null, talloc_tos(), &outdata, &cstatus);
+       if (ret != 0) {
+               DEBUG(1, ("ctdbd_control failed: %s\n", strerror(ret)));
                return false;
        }
        if ((cstatus != 0) || (outdata.dptr == NULL)) {
@@ -216,7 +259,7 @@ static bool ctdbd_working(struct ctdbd_connection *conn, uint32_t vnn)
                return false;
        }
 
-       m = (struct ctdb_node_map *)outdata.dptr;
+       m = (struct ctdb_node_map_old *)outdata.dptr;
 
        for (i=0; i<m->num; i++) {
                if (vnn == m->nodes[i].pnn) {
@@ -230,19 +273,16 @@ static bool ctdbd_working(struct ctdbd_connection *conn, uint32_t vnn)
                goto fail;
        }
 
-       failure_flags = NODE_FLAGS_BANNED | NODE_FLAGS_DISCONNECTED
-               | NODE_FLAGS_PERMANENTLY_DISABLED | NODE_FLAGS_STOPPED;
-
-       if ((m->nodes[i].flags & failure_flags) != 0) {
+       if ((m->nodes[i].flags & NODE_FLAGS_INACTIVE) != 0) {
                DEBUG(2, ("Node has status %x, not active\n",
                          (int)m->nodes[i].flags));
                goto fail;
        }
 
-       ret = true;
+       ok = true;
 fail:
        TALLOC_FREE(outdata.dptr);
-       return ret;
+       return ok;
 }
 
 uint32_t ctdbd_vnn(const struct ctdbd_connection *conn)
@@ -250,18 +290,6 @@ uint32_t ctdbd_vnn(const struct ctdbd_connection *conn)
        return conn->our_vnn;
 }
 
-const char *lp_ctdbd_socket(void)
-{
-       const char *ret;
-
-       ret = lp__ctdbd_socket();
-       if (ret != NULL && strlen(ret) > 0) {
-               return ret;
-       }
-
-       return CTDB_SOCKET;
-}
-
 /*
  * Get us a ctdb connection
  */
@@ -308,12 +336,14 @@ static int ctdb_read_packet(int fd, int timeout, TALLOC_CTX *mem_ctx,
                            struct ctdb_req_header **result)
 {
        struct ctdb_req_header *req;
-       int ret, revents;
        uint32_t msglen;
        ssize_t nread;
 
        if (timeout != -1) {
-               ret = poll_one_fd(fd, POLLIN, timeout, &revents);
+               struct pollfd pfd = { .fd = fd, .events = POLLIN };
+               int ret;
+
+               ret = sys_poll_intr(&pfd, 1, timeout);
                if (ret == -1) {
                        return errno;
                }
@@ -348,9 +378,11 @@ static int ctdb_read_packet(int fd, int timeout, TALLOC_CTX *mem_ctx,
        nread = read_data(fd, ((char *)req) + sizeof(msglen),
                          msglen - sizeof(msglen));
        if (nread == -1) {
+               TALLOC_FREE(req);
                return errno;
        }
        if (nread == 0) {
+               TALLOC_FREE(req);
                return EIO;
        }
 
@@ -381,17 +413,9 @@ static int ctdb_read_req(struct ctdbd_connection *conn, uint32_t reqid,
        ctdb_packet_dump(hdr);
 
        if (hdr->operation == CTDB_REQ_MESSAGE) {
-               struct ctdb_req_message *msg = (struct ctdb_req_message *)hdr;
-
-               if (conn->msg_ctx == NULL) {
-                       DEBUG(1, ("Got a message without having a msg ctx, "
-                                 "dropping msg %llu\n",
-                                 (long long unsigned)msg->srvid));
-                       TALLOC_FREE(hdr);
-                       goto next_pkt;
-               }
+               struct ctdb_req_message_old *msg = (struct ctdb_req_message_old *)hdr;
 
-               ret = ctdbd_msg_call_back(conn, msg);
+               ret = ctdbd_msg_call_back(NULL, conn, msg);
                if (ret != 0) {
                        TALLOC_FREE(hdr);
                        return ret;
@@ -414,106 +438,132 @@ static int ctdb_read_req(struct ctdbd_connection *conn, uint32_t reqid,
        return 0;
 }
 
-static int ctdbd_connection_destructor(struct ctdbd_connection *c)
+/**
+ * This prepares conn for handling async requests
+ **/
+int ctdbd_setup_fde(struct ctdbd_connection *conn, struct tevent_context *ev)
 {
-       close(c->fd);
+       int ret;
+
+       ret = set_blocking(conn->fd, false);
+       if (ret == -1) {
+               return errno;
+       }
+
+       conn->fde = tevent_add_fd(ev,
+                                 conn,
+                                 conn->fd,
+                                 TEVENT_FD_READ,
+                                 ctdbd_async_socket_handler,
+                                 conn);
+       if (conn->fde == NULL) {
+               return ENOMEM;
+       }
+
        return 0;
 }
+
+static int ctdbd_connection_destructor(struct ctdbd_connection *c);
+
 /*
  * Get us a ctdbd connection
  */
 
-static NTSTATUS ctdbd_init_connection(TALLOC_CTX *mem_ctx,
-                                     struct ctdbd_connection **pconn)
+static int ctdbd_init_connection_internal(TALLOC_CTX *mem_ctx,
+                                         const char *sockname, int timeout,
+                                         struct ctdbd_connection *conn)
 {
-       const char *sockname = lp_ctdbd_socket();
-       struct ctdbd_connection *conn;
        int ret;
-       NTSTATUS status;
-
-       if (!(conn = talloc_zero(mem_ctx, struct ctdbd_connection))) {
-               DEBUG(0, ("talloc failed\n"));
-               return NT_STATUS_NO_MEMORY;
-       }
-
-       conn->timeout = lp_ctdb_timeout();
 
+       conn->timeout = timeout;
        if (conn->timeout == 0) {
                conn->timeout = -1;
        }
 
        ret = ctdbd_connect(sockname, &conn->fd);
        if (ret != 0) {
-               status = map_nt_error_from_unix(ret);
                DEBUG(1, ("ctdbd_connect failed: %s\n", strerror(ret)));
-               goto fail;
+               return ret;
        }
        talloc_set_destructor(conn, ctdbd_connection_destructor);
 
-       status = get_cluster_vnn(conn, &conn->our_vnn);
-
-       if (!NT_STATUS_IS_OK(status)) {
-               DEBUG(10, ("get_cluster_vnn failed: %s\n", nt_errstr(status)));
-               goto fail;
+       ret = get_cluster_vnn(conn, &conn->our_vnn);
+       if (ret != 0) {
+               DEBUG(10, ("get_cluster_vnn failed: %s\n", strerror(ret)));
+               return ret;
        }
 
        if (!ctdbd_working(conn, conn->our_vnn)) {
                DEBUG(2, ("Node is not working, can not connect\n"));
-               status = NT_STATUS_INTERNAL_DB_ERROR;
-               goto fail;
+               return EIO;
        }
 
        generate_random_buffer((unsigned char *)&conn->rand_srvid,
                               sizeof(conn->rand_srvid));
 
-       status = register_with_ctdbd(conn, conn->rand_srvid, NULL, NULL);
-
-       if (!NT_STATUS_IS_OK(status)) {
+       ret = register_with_ctdbd(conn, conn->rand_srvid, NULL, NULL);
+       if (ret != 0) {
                DEBUG(5, ("Could not register random srvid: %s\n",
-                         nt_errstr(status)));
-               goto fail;
+                         strerror(ret)));
+               return ret;
        }
 
-       *pconn = conn;
-       return NT_STATUS_OK;
-
- fail:
-       TALLOC_FREE(conn);
-       return status;
+       return 0;
 }
 
-/*
- * Get us a ctdbd connection and register us as a process
- */
-
-NTSTATUS ctdbd_messaging_connection(TALLOC_CTX *mem_ctx,
-                                   struct ctdbd_connection **pconn)
+int ctdbd_init_connection(TALLOC_CTX *mem_ctx,
+                         const char *sockname, int timeout,
+                         struct ctdbd_connection **pconn)
 {
-        struct ctdbd_connection *conn;
-       NTSTATUS status;
-
-       status = ctdbd_init_connection(mem_ctx, &conn);
+       struct ctdbd_connection *conn;
+       int ret;
 
-       if (!NT_STATUS_IS_OK(status)) {
-               return status;
+       if (!(conn = talloc_zero(mem_ctx, struct ctdbd_connection))) {
+               DEBUG(0, ("talloc failed\n"));
+               return ENOMEM;
        }
 
-       status = register_with_ctdbd(conn, MSG_SRVID_SAMBA, NULL, NULL);
-       if (!NT_STATUS_IS_OK(status)) {
+       ret = ctdbd_init_connection_internal(mem_ctx,
+                                            sockname,
+                                            timeout,
+                                            conn);
+       if (ret != 0) {
+               DBG_ERR("ctdbd_init_connection_internal failed (%s)\n",
+                       strerror(ret));
                goto fail;
        }
 
        *pconn = conn;
-       return NT_STATUS_OK;
+       return 0;
 
  fail:
        TALLOC_FREE(conn);
-       return status;
+       return ret;
 }
 
-struct messaging_context *ctdb_conn_msg_ctx(struct ctdbd_connection *conn)
+int ctdbd_reinit_connection(TALLOC_CTX *mem_ctx,
+                           const char *sockname, int timeout,
+                           struct ctdbd_connection *conn)
 {
-       return conn->msg_ctx;
+       int ret;
+
+       ret = ctdbd_connection_destructor(conn);
+       if (ret != 0) {
+               DBG_ERR("ctdbd_connection_destructor failed\n");
+               return ret;
+       }
+
+       ret = ctdbd_init_connection_internal(mem_ctx,
+                                            sockname,
+                                            timeout,
+                                            conn);
+       if (ret != 0) {
+               DBG_ERR("ctdbd_init_connection_internal failed (%s)\n",
+                       strerror(ret));
+               return ret;
+       }
+
+       return 0;
 }
 
 int ctdbd_conn_get_fd(struct ctdbd_connection *conn)
@@ -524,10 +574,11 @@ int ctdbd_conn_get_fd(struct ctdbd_connection *conn)
 /*
  * Packet handler to receive and handle a ctdb message
  */
-static int ctdb_handle_message(struct ctdbd_connection *conn,
+static int ctdb_handle_message(struct tevent_context *ev,
+                              struct ctdbd_connection *conn,
                               struct ctdb_req_header *hdr)
 {
-       struct ctdb_req_message *msg;
+       struct ctdb_req_message_old *msg;
 
        if (hdr->operation != CTDB_REQ_MESSAGE) {
                DEBUG(0, ("Received async msg of type %u, discarding\n",
@@ -535,24 +586,16 @@ static int ctdb_handle_message(struct ctdbd_connection *conn,
                return EINVAL;
        }
 
-       msg = (struct ctdb_req_message *)hdr;
+       msg = (struct ctdb_req_message_old *)hdr;
 
-       ctdbd_msg_call_back(conn, msg);
+       ctdbd_msg_call_back(ev, conn, msg);
 
        return 0;
 }
 
-/*
- * The ctdbd socket is readable asynchronuously
- */
-
-static void ctdbd_socket_handler(struct tevent_context *event_ctx,
-                                struct tevent_fd *event,
-                                uint16_t flags,
-                                void *private_data)
+void ctdbd_socket_readable(struct tevent_context *ev,
+                          struct ctdbd_connection *conn)
 {
-       struct ctdbd_connection *conn = talloc_get_type_abort(
-               private_data, struct ctdbd_connection);
        struct ctdb_req_header *hdr = NULL;
        int ret;
 
@@ -562,7 +605,7 @@ static void ctdbd_socket_handler(struct tevent_context *event_ctx,
                cluster_fatal("ctdbd died\n");
        }
 
-       ret = ctdb_handle_message(conn, hdr);
+       ret = ctdb_handle_message(ev, conn, hdr);
 
        TALLOC_FREE(hdr);
 
@@ -572,41 +615,51 @@ static void ctdbd_socket_handler(struct tevent_context *event_ctx,
        }
 }
 
-/*
- * Prepare a ctdbd connection to receive messages
- */
+static int ctdb_pkt_send_handler(struct ctdbd_connection *conn);
+static int ctdb_pkt_recv_handler(struct ctdbd_connection *conn);
 
-NTSTATUS ctdbd_register_msg_ctx(struct ctdbd_connection *conn,
-                               struct messaging_context *msg_ctx)
+/* Used for async connection and async ctcb requests */
+static void ctdbd_async_socket_handler(struct tevent_context *ev,
+                                      struct tevent_fd *fde,
+                                      uint16_t flags,
+                                      void *private_data)
 {
-       SMB_ASSERT(conn->msg_ctx == NULL);
-       SMB_ASSERT(conn->fde == NULL);
+       struct ctdbd_connection *conn = talloc_get_type_abort(
+               private_data, struct ctdbd_connection);
+       int ret;
 
-       if (!(conn->fde = tevent_add_fd(messaging_tevent_context(msg_ctx),
-                                      conn,
-                                      conn->fd,
-                                      TEVENT_FD_READ,
-                                      ctdbd_socket_handler,
-                                      conn))) {
-               DEBUG(0, ("event_add_fd failed\n"));
-               return NT_STATUS_NO_MEMORY;
+       if ((flags & TEVENT_FD_READ) != 0) {
+               ret = ctdb_pkt_recv_handler(conn);
+               if (ret != 0) {
+                       DBG_DEBUG("ctdb_read_iov_handler returned %s\n",
+                                 strerror(ret));
+               }
+               return;
        }
 
-       conn->msg_ctx = msg_ctx;
+       if ((flags & TEVENT_FD_WRITE) != 0) {
+               ret = ctdb_pkt_send_handler(conn);
+               if (ret != 0) {
+                       DBG_DEBUG("ctdb_write_iov_handler returned %s\n",
+                                 strerror(ret));
+                       return;
+               }
+               return;
+       }
 
-       return NT_STATUS_OK;
+       return;
 }
 
-NTSTATUS ctdbd_messaging_send_iov(struct ctdbd_connection *conn,
-                                 uint32_t dst_vnn, uint64_t dst_srvid,
-                                 const struct iovec *iov, int iovlen)
+int ctdbd_messaging_send_iov(struct ctdbd_connection *conn,
+                            uint32_t dst_vnn, uint64_t dst_srvid,
+                            const struct iovec *iov, int iovlen)
 {
-       struct ctdb_req_message r;
+       struct ctdb_req_message_old r;
        struct iovec iov2[iovlen+1];
        size_t buflen = iov_buflen(iov, iovlen);
        ssize_t nwritten;
 
-       r.hdr.length = offsetof(struct ctdb_req_message, data) + buflen;
+       r.hdr.length = offsetof(struct ctdb_req_message_old, data) + buflen;
        r.hdr.ctdb_magic = CTDB_MAGIC;
        r.hdr.ctdb_version = CTDB_PROTOCOL;
        r.hdr.generation = 1;
@@ -621,7 +674,7 @@ NTSTATUS ctdbd_messaging_send_iov(struct ctdbd_connection *conn,
        ctdb_packet_dump(&r.hdr);
 
        iov2[0].iov_base = &r;
-       iov2[0].iov_len = offsetof(struct ctdb_req_message, data);
+       iov2[0].iov_len = offsetof(struct ctdb_req_message_old, data);
        memcpy(&iov2[1], iov, iovlen * sizeof(struct iovec));
 
        nwritten = write_data_iov(conn->fd, iov2, iovlen+1);
@@ -630,42 +683,39 @@ NTSTATUS ctdbd_messaging_send_iov(struct ctdbd_connection *conn,
                cluster_fatal("cluster dispatch daemon msg write error\n");
        }
 
-       return NT_STATUS_OK;
+       return 0;
 }
 
 /*
  * send/recv a generic ctdb control message
  */
-static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
-                             uint32_t vnn, uint32_t opcode,
-                             uint64_t srvid, uint32_t flags,
-                             TDB_DATA data,
-                             TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
-                             int *cstatus)
+static int ctdbd_control(struct ctdbd_connection *conn,
+                        uint32_t vnn, uint32_t opcode,
+                        uint64_t srvid, uint32_t flags,
+                        TDB_DATA data,
+                        TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
+                        int32_t *cstatus)
 {
-       struct ctdb_req_control req;
+       struct ctdb_req_control_old req;
        struct ctdb_req_header *hdr;
-       struct ctdb_reply_control *reply = NULL;
-       struct ctdbd_connection *new_conn = NULL;
+       struct ctdb_reply_control_old *reply = NULL;
        struct iovec iov[2];
        ssize_t nwritten;
-       NTSTATUS status;
        int ret;
 
-       if (conn == NULL) {
-               status = ctdbd_init_connection(NULL, &new_conn);
-
-               if (!NT_STATUS_IS_OK(status)) {
-                       DEBUG(10, ("Could not init temp connection: %s\n",
-                                  nt_errstr(status)));
-                       goto fail;
-               }
-
-               conn = new_conn;
+       if (ctdbd_conn_has_async_reqs(conn)) {
+               /*
+                * Can't use sync call while an async call is in flight. Adding
+                * this check as a safety net. We'll be using different
+                * connections for sync and async requests, so this shouldn't
+                * happen, but who knows...
+                */
+               DBG_ERR("Async ctdb req on sync connection\n");
+               return EINVAL;
        }
 
        ZERO_STRUCT(req);
-       req.hdr.length = offsetof(struct ctdb_req_control, data) + data.dsize;
+       req.hdr.length = offsetof(struct ctdb_req_control_old, data) + data.dsize;
        req.hdr.ctdb_magic   = CTDB_MAGIC;
        req.hdr.ctdb_version = CTDB_PROTOCOL;
        req.hdr.operation    = CTDB_REQ_CONTROL;
@@ -676,11 +726,13 @@ static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
        req.datalen          = data.dsize;
        req.flags            = flags;
 
-       DEBUG(10, ("ctdbd_control: Sending ctdb packet\n"));
+       DBG_DEBUG("Sending ctdb packet reqid=%"PRIu32", vnn=%"PRIu32", "
+                 "opcode=%"PRIu32", srvid=%"PRIu64"\n", req.hdr.reqid,
+                 req.hdr.destnode, req.opcode, req.srvid);
        ctdb_packet_dump(&req.hdr);
 
        iov[0].iov_base = &req;
-       iov[0].iov_len = offsetof(struct ctdb_req_control, data);
+       iov[0].iov_len = offsetof(struct ctdb_req_control_old, data);
        iov[1].iov_base = data.dptr;
        iov[1].iov_len = data.dsize;
 
@@ -691,31 +743,30 @@ static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
        }
 
        if (flags & CTDB_CTRL_FLAG_NOREPLY) {
-               TALLOC_FREE(new_conn);
                if (cstatus) {
                        *cstatus = 0;
                }
-               return NT_STATUS_OK;
+               return 0;
        }
 
        ret = ctdb_read_req(conn, req.hdr.reqid, NULL, &hdr);
        if (ret != 0) {
                DEBUG(10, ("ctdb_read_req failed: %s\n", strerror(ret)));
-               status = map_nt_error_from_unix(ret);
-               goto fail;
+               return ret;
        }
 
        if (hdr->operation != CTDB_REPLY_CONTROL) {
                DEBUG(0, ("received invalid reply\n"));
-               goto fail;
+               TALLOC_FREE(hdr);
+               return EIO;
        }
-       reply = (struct ctdb_reply_control *)hdr;
+       reply = (struct ctdb_reply_control_old *)hdr;
 
        if (outdata) {
                if (!(outdata->dptr = (uint8_t *)talloc_memdup(
                              mem_ctx, reply->data, reply->datalen))) {
                        TALLOC_FREE(reply);
-                       return NT_STATUS_NO_MEMORY;
+                       return ENOMEM;
                }
                outdata->dsize = reply->datalen;
        }
@@ -723,139 +774,42 @@ static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
                (*cstatus) = reply->status;
        }
 
-       status = NT_STATUS_OK;
-
- fail:
-       TALLOC_FREE(new_conn);
        TALLOC_FREE(reply);
-       return status;
+       return ret;
 }
 
 /*
  * see if a remote process exists
  */
-bool ctdbd_process_exists(struct ctdbd_connection *conn, uint32_t vnn, pid_t pid)
-{
-       struct server_id id;
-       bool result;
-
-       id.pid = pid;
-       id.vnn = vnn;
-
-       if (!ctdb_processes_exist(conn, &id, 1, &result)) {
-               DEBUG(10, ("ctdb_processes_exist failed\n"));
-               return false;
-       }
-       return result;
-}
-
-bool ctdb_processes_exist(struct ctdbd_connection *conn,
-                         const struct server_id *pids, int num_pids,
-                         bool *results)
+bool ctdbd_process_exists(struct ctdbd_connection *conn, uint32_t vnn,
+                         pid_t pid, uint64_t unique_id)
 {
-       TALLOC_CTX *frame = talloc_stackframe();
-       int i, num_received;
-       uint32_t *reqids;
-       bool result = false;
-
-       reqids = talloc_array(talloc_tos(), uint32_t, num_pids);
-       if (reqids == NULL) {
-               goto fail;
-       }
-
-       for (i=0; i<num_pids; i++) {
-               struct ctdb_req_control req;
-               pid_t pid;
-               struct iovec iov[2];
-               ssize_t nwritten;
-
-               results[i] = false;
-               reqids[i] = ctdbd_next_reqid(conn);
-
-               ZERO_STRUCT(req);
-
-               /*
-                * pids[i].pid is uint64_t, scale down to pid_t which
-                * is the wire protocol towards ctdb.
-                */
-               pid = pids[i].pid;
-
-               DEBUG(10, ("Requesting PID %d/%d, reqid=%d\n",
-                          (int)pids[i].vnn, (int)pid,
-                          (int)reqids[i]));
-
-               req.hdr.length = offsetof(struct ctdb_req_control, data);
-               req.hdr.length += sizeof(pid);
-               req.hdr.ctdb_magic   = CTDB_MAGIC;
-               req.hdr.ctdb_version = CTDB_PROTOCOL;
-               req.hdr.operation    = CTDB_REQ_CONTROL;
-               req.hdr.reqid        = reqids[i];
-               req.hdr.destnode     = pids[i].vnn;
-               req.opcode           = CTDB_CONTROL_PROCESS_EXISTS;
-               req.srvid            = 0;
-               req.datalen          = sizeof(pid);
-               req.flags            = 0;
-
-               DEBUG(10, ("ctdbd_control: Sending ctdb packet\n"));
-               ctdb_packet_dump(&req.hdr);
-
-               iov[0].iov_base = &req;
-               iov[0].iov_len = offsetof(struct ctdb_req_control, data);
-               iov[1].iov_base = &pid;
-               iov[1].iov_len = sizeof(pid);
-
-               nwritten = write_data_iov(conn->fd, iov, ARRAY_SIZE(iov));
-               if (nwritten == -1) {
-                       DEBUG(10, ("write_data_iov failed: %s\n",
-                                  strerror(errno)));
-                       goto fail;
-               }
-       }
-
-       num_received = 0;
-
-       while (num_received < num_pids) {
-               struct ctdb_req_header *hdr;
-               struct ctdb_reply_control *reply;
-               uint32_t reqid;
-               int ret;
+       uint8_t buf[sizeof(pid)+sizeof(unique_id)];
+       int32_t cstatus = 0;
+       int ret;
 
-               ret = ctdb_read_req(conn, 0, talloc_tos(), &hdr);
+       if (unique_id == SERVERID_UNIQUE_ID_NOT_TO_VERIFY) {
+               ret = ctdbd_control(conn, vnn, CTDB_CONTROL_PROCESS_EXISTS,
+                                   0, 0,
+                                   (TDB_DATA) { .dptr = (uint8_t *)&pid,
+                                                   .dsize = sizeof(pid) },
+                                   NULL, NULL, &cstatus);
                if (ret != 0) {
-                       DEBUG(10, ("ctdb_read_req failed: %s\n",
-                                  strerror(ret)));
-                       goto fail;
-               }
-
-               if (hdr->operation != CTDB_REPLY_CONTROL) {
-                       DEBUG(10, ("Received invalid reply\n"));
-                       goto fail;
+                       return false;
                }
-               reply = (struct ctdb_reply_control *)hdr;
-
-               reqid = reply->hdr.reqid;
+               return (cstatus == 0);
+       }
 
-               DEBUG(10, ("Received reqid %d\n", (int)reqid));
+       memcpy(buf, &pid, sizeof(pid));
+       memcpy(buf+sizeof(pid), &unique_id, sizeof(unique_id));
 
-               for (i=0; i<num_pids; i++) {
-                       if (reqid == reqids[i]) {
-                               break;
-                       }
-               }
-               if (i == num_pids) {
-                       DEBUG(10, ("Received unknown record number %u\n",
-                                  (unsigned)reqid));
-                       goto fail;
-               }
-               results[i] = ((reply->status) == 0);
-               TALLOC_FREE(reply);
-               num_received += 1;
+       ret = ctdbd_control(conn, vnn, CTDB_CONTROL_CHECK_PID_SRVID, 0, 0,
+                           (TDB_DATA) { .dptr = buf, .dsize = sizeof(buf) },
+                           NULL, NULL, &cstatus);
+       if (ret != 0) {
+               return false;
        }
-
-       result = true;
-fail:
-       TALLOC_FREE(frame);
-       return result;
+       return (cstatus == 0);
 }
 
 /*
@@ -864,7 +818,7 @@ fail:
 char *ctdbd_dbpath(struct ctdbd_connection *conn,
                   TALLOC_CTX *mem_ctx, uint32_t db_id)
 {
-       NTSTATUS status;
+       int ret;
        TDB_DATA data;
        TDB_DATA rdata = {0};
        int32_t cstatus = 0;
@@ -872,11 +826,11 @@ char *ctdbd_dbpath(struct ctdbd_connection *conn,
        data.dptr = (uint8_t*)&db_id;
        data.dsize = sizeof(db_id);
 
-       status = ctdbd_control(conn, CTDB_CURRENT_NODE,
-                              CTDB_CONTROL_GETDBPATH, 0, 0, data,
-                              mem_ctx, &rdata, &cstatus);
-       if (!NT_STATUS_IS_OK(status) || cstatus != 0) {
-               DEBUG(0,(__location__ " ctdb_control for getdbpath failed\n"));
+       ret = ctdbd_control_local(conn, CTDB_CONTROL_GETDBPATH, 0, 0, data,
+                                 mem_ctx, &rdata, &cstatus);
+       if ((ret != 0) || cstatus != 0) {
+               DEBUG(0, (__location__ " ctdb_control for getdbpath failed: %s\n",
+                         strerror(ret)));
                return NULL;
        }
 
@@ -886,71 +840,62 @@ char *ctdbd_dbpath(struct ctdbd_connection *conn,
 /*
  * attach to a ctdb database
  */
-NTSTATUS ctdbd_db_attach(struct ctdbd_connection *conn,
-                        const char *name, uint32_t *db_id, int tdb_flags)
+int ctdbd_db_attach(struct ctdbd_connection *conn,
+                   const char *name, uint32_t *db_id, bool persistent)
 {
-       NTSTATUS status;
+       int ret;
        TDB_DATA data;
        int32_t cstatus;
-       bool persistent = (tdb_flags & TDB_CLEAR_IF_FIRST) == 0;
 
        data = string_term_tdb_data(name);
 
-       status = ctdbd_control(conn, CTDB_CURRENT_NODE,
-                              persistent
-                              ? CTDB_CONTROL_DB_ATTACH_PERSISTENT
-                              : CTDB_CONTROL_DB_ATTACH,
-                              tdb_flags, 0, data, NULL, &data, &cstatus);
-       if (!NT_STATUS_IS_OK(status)) {
+       ret = ctdbd_control_local(conn,
+                                 persistent
+                                 ? CTDB_CONTROL_DB_ATTACH_PERSISTENT
+                                 : CTDB_CONTROL_DB_ATTACH,
+                                 0, 0, data, NULL, &data, &cstatus);
+       if (ret != 0) {
                DEBUG(0, (__location__ " ctdb_control for db_attach "
-                         "failed: %s\n", nt_errstr(status)));
-               return status;
+                         "failed: %s\n", strerror(ret)));
+               return ret;
        }
 
        if (cstatus != 0 || data.dsize != sizeof(uint32_t)) {
                DEBUG(0,(__location__ " ctdb_control for db_attach failed\n"));
-               return NT_STATUS_INTERNAL_ERROR;
+               return EIO;
        }
 
        *db_id = *(uint32_t *)data.dptr;
        talloc_free(data.dptr);
 
-       if (!(tdb_flags & TDB_SEQNUM)) {
-               return NT_STATUS_OK;
-       }
-
-       data.dptr = (uint8_t *)db_id;
-       data.dsize = sizeof(*db_id);
-
-       status = ctdbd_control(conn, CTDB_CURRENT_NODE,
-                              CTDB_CONTROL_ENABLE_SEQNUM, 0, 0, data,
-                              NULL, NULL, &cstatus);
-       if (!NT_STATUS_IS_OK(status) || cstatus != 0) {
-               DEBUG(0,(__location__ " ctdb_control for enable seqnum "
-                        "failed\n"));
-               return NT_STATUS_IS_OK(status) ? NT_STATUS_INTERNAL_ERROR :
-                       status;
-       }
-
-       return NT_STATUS_OK;
+       return 0;
 }
 
 /*
  * force the migration of a record to this node
  */
-NTSTATUS ctdbd_migrate(struct ctdbd_connection *conn, uint32_t db_id,
-                      TDB_DATA key)
+int ctdbd_migrate(struct ctdbd_connection *conn, uint32_t db_id, TDB_DATA key)
 {
-       struct ctdb_req_call req;
-       struct ctdb_req_header *hdr;
+       struct ctdb_req_call_old req;
+       struct ctdb_req_header *hdr = NULL;
        struct iovec iov[2];
        ssize_t nwritten;
-       NTSTATUS status;
        int ret;
 
+       if (ctdbd_conn_has_async_reqs(conn)) {
+               /*
+                * Can't use sync call while an async call is in flight. Adding
+                * this check as a safety net. We'll be using different
+                * connections for sync and async requests, so this shouldn't
+                * happen, but who knows...
+                */
+               DBG_ERR("Async ctdb req on sync connection\n");
+               return EINVAL;
+       }
+
        ZERO_STRUCT(req);
 
-       req.hdr.length = offsetof(struct ctdb_req_call, data) + key.dsize;
+       req.hdr.length = offsetof(struct ctdb_req_call_old, data) + key.dsize;
        req.hdr.ctdb_magic   = CTDB_MAGIC;
        req.hdr.ctdb_version = CTDB_PROTOCOL;
        req.hdr.operation    = CTDB_REQ_CALL;
@@ -964,7 +909,7 @@ NTSTATUS ctdbd_migrate(struct ctdbd_connection *conn, uint32_t db_id,
        ctdb_packet_dump(&req.hdr);
 
        iov[0].iov_base = &req;
-       iov[0].iov_len = offsetof(struct ctdb_req_call, data);
+       iov[0].iov_len = offsetof(struct ctdb_req_call_old, data);
        iov[1].iov_base = key.dptr;
        iov[1].iov_len = key.dsize;
 
@@ -977,46 +922,58 @@ NTSTATUS ctdbd_migrate(struct ctdbd_connection *conn, uint32_t db_id,
        ret = ctdb_read_req(conn, req.hdr.reqid, NULL, &hdr);
        if (ret != 0) {
                DEBUG(10, ("ctdb_read_req failed: %s\n", strerror(ret)));
-               status = map_nt_error_from_unix(ret);
                goto fail;
        }
 
        if (hdr->operation != CTDB_REPLY_CALL) {
-               DEBUG(0, ("received invalid reply\n"));
-               status = NT_STATUS_INTERNAL_ERROR;
+               if (hdr->operation == CTDB_REPLY_ERROR) {
+                       DBG_ERR("received error from ctdb\n");
+               } else {
+                       DBG_ERR("received invalid reply\n");
+               }
+               ret = EIO;
                goto fail;
        }
 
-       status = NT_STATUS_OK;
  fail:
 
        TALLOC_FREE(hdr);
-       return status;
+       return ret;
 }
 
 /*
  * Fetch a record and parse it
  */
-NTSTATUS ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
-                    TDB_DATA key, bool local_copy,
-                    void (*parser)(TDB_DATA key, TDB_DATA data,
-                                   void *private_data),
-                    void *private_data)
+int ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
+               TDB_DATA key, bool local_copy,
+               void (*parser)(TDB_DATA key, TDB_DATA data,
+                              void *private_data),
+               void *private_data)
 {
-       struct ctdb_req_call req;
+       struct ctdb_req_call_old req;
        struct ctdb_req_header *hdr = NULL;
-       struct ctdb_reply_call *reply;
+       struct ctdb_reply_call_old *reply;
        struct iovec iov[2];
        ssize_t nwritten;
-       NTSTATUS status;
        uint32_t flags;
        int ret;
 
+       if (ctdbd_conn_has_async_reqs(conn)) {
+               /*
+                * Can't use sync call while an async call is in flight. Adding
+                * this check as a safety net. We'll be using different
+                * connections for sync and async requests, so this shouldn't
+                * happen, but who knows...
+                */
+               DBG_ERR("Async ctdb req on sync connection\n");
+               return EINVAL;
+       }
+
        flags = local_copy ? CTDB_WANT_READONLY : 0;
 
        ZERO_STRUCT(req);
 
-       req.hdr.length = offsetof(struct ctdb_req_call, data) + key.dsize;
+       req.hdr.length = offsetof(struct ctdb_req_call_old, data) + key.dsize;
        req.hdr.ctdb_magic   = CTDB_MAGIC;
        req.hdr.ctdb_version = CTDB_PROTOCOL;
        req.hdr.operation    = CTDB_REQ_CALL;
@@ -1027,7 +984,7 @@ NTSTATUS ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
        req.keylen           = key.dsize;
 
        iov[0].iov_base = &req;
-       iov[0].iov_len = offsetof(struct ctdb_req_call, data);
+       iov[0].iov_len = offsetof(struct ctdb_req_call_old, data);
        iov[1].iov_base = key.dptr;
        iov[1].iov_len = key.dsize;
 
@@ -1040,59 +997,58 @@ NTSTATUS ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
        ret = ctdb_read_req(conn, req.hdr.reqid, NULL, &hdr);
        if (ret != 0) {
                DEBUG(10, ("ctdb_read_req failed: %s\n", strerror(ret)));
-               status = map_nt_error_from_unix(ret);
                goto fail;
        }
 
        if ((hdr == NULL) || (hdr->operation != CTDB_REPLY_CALL)) {
                DEBUG(0, ("received invalid reply\n"));
-               status = NT_STATUS_INTERNAL_ERROR;
+               ret = EIO;
                goto fail;
        }
-       reply = (struct ctdb_reply_call *)hdr;
+       reply = (struct ctdb_reply_call_old *)hdr;
 
        if (reply->datalen == 0) {
                /*
                 * Treat an empty record as non-existing
                 */
-               status = NT_STATUS_NOT_FOUND;
+               ret = ENOENT;
                goto fail;
        }
 
        parser(key, make_tdb_data(&reply->data[0], reply->datalen),
               private_data);
 
-       status = NT_STATUS_OK;
+       ret = 0;
  fail:
        TALLOC_FREE(hdr);
-       return status;
+       return ret;
 }
 
 /*
-  Traverse a ctdb database. This uses a kind-of hackish way to open a second
-  connection to ctdbd to avoid the hairy recursive and async problems with
-  everything in-line.
+  Traverse a ctdb database. "conn" must be an otherwise unused
+  ctdb_connection where no other messages but the traverse ones are
+  expected.
 */
 
-NTSTATUS ctdbd_traverse(uint32_t db_id,
+int ctdbd_traverse(struct ctdbd_connection *conn, uint32_t db_id,
                        void (*fn)(TDB_DATA key, TDB_DATA data,
                                   void *private_data),
                        void *private_data)
 {
-       struct ctdbd_connection *conn;
-       NTSTATUS status;
-
+       int ret;
        TDB_DATA key, data;
        struct ctdb_traverse_start t;
-       int cstatus;
+       int32_t cstatus;
 
-       become_root();
-       status = ctdbd_init_connection(NULL, &conn);
-       unbecome_root();
-       if (!NT_STATUS_IS_OK(status)) {
-               DEBUG(0, ("ctdbd_init_connection failed: %s\n",
-                         nt_errstr(status)));
-               return status;
+       if (ctdbd_conn_has_async_reqs(conn)) {
+               /*
+                * Can't use sync call while an async call is in flight. Adding
+                * this check as a safety net. We'll be using different
+                * connections for sync and async requests, so this shouldn't
+                * happen, but who knows...
+                */
+               DBG_ERR("Async ctdb req on sync connection\n");
+               return EINVAL;
        }
 
        t.db_id = db_id;
@@ -1102,30 +1058,27 @@ NTSTATUS ctdbd_traverse(uint32_t db_id,
        data.dptr = (uint8_t *)&t;
        data.dsize = sizeof(t);
 
-       status = ctdbd_control(conn, CTDB_CURRENT_NODE,
-                              CTDB_CONTROL_TRAVERSE_START, conn->rand_srvid, 0,
-                              data, NULL, NULL, &cstatus);
+       ret = ctdbd_control_local(conn, CTDB_CONTROL_TRAVERSE_START,
+                                 conn->rand_srvid,
+                                 0, data, NULL, NULL, &cstatus);
 
-       if (!NT_STATUS_IS_OK(status) || (cstatus != 0)) {
-
-               DEBUG(0,("ctdbd_control failed: %s, %d\n", nt_errstr(status),
+       if ((ret != 0) || (cstatus != 0)) {
+               DEBUG(0,("ctdbd_control failed: %s, %d\n", strerror(ret),
                         cstatus));
 
-               if (NT_STATUS_IS_OK(status)) {
+               if (ret == 0) {
                        /*
                         * We need a mapping here
                         */
-                       status = NT_STATUS_UNSUCCESSFUL;
+                       ret = EIO;
                }
-               TALLOC_FREE(conn);
-               return status;
+               return ret;
        }
 
-       while (True) {
+       while (true) {
                struct ctdb_req_header *hdr = NULL;
-               struct ctdb_req_message *m;
-               struct ctdb_rec_data *d;
-               int ret;
+               struct ctdb_req_message_old *m;
+               struct ctdb_rec_data_old *d;
 
                ret = ctdb_read_packet(conn->fd, conn->timeout, conn, &hdr);
                if (ret != 0) {
@@ -1137,17 +1090,15 @@ NTSTATUS ctdbd_traverse(uint32_t db_id,
                if (hdr->operation != CTDB_REQ_MESSAGE) {
                        DEBUG(0, ("Got operation %u, expected a message\n",
                                  (unsigned)hdr->operation));
-                       TALLOC_FREE(conn);
-                       return NT_STATUS_UNEXPECTED_IO_ERROR;
+                       return EIO;
                }
 
-               m = (struct ctdb_req_message *)hdr;
-               d = (struct ctdb_rec_data *)&m->data[0];
+               m = (struct ctdb_req_message_old *)hdr;
+               d = (struct ctdb_rec_data_old *)&m->data[0];
                if (m->datalen < sizeof(uint32_t) || m->datalen != d->length) {
                        DEBUG(0, ("Got invalid traverse data of length %d\n",
                                  (int)m->datalen));
-                       TALLOC_FREE(conn);
-                       return NT_STATUS_UNEXPECTED_IO_ERROR;
+                       return EIO;
                }
 
                key.dsize = d->keylen;
@@ -1157,15 +1108,13 @@ NTSTATUS ctdbd_traverse(uint32_t db_id,
 
                if (key.dsize == 0 && data.dsize == 0) {
                        /* end of traverse */
-                       TALLOC_FREE(conn);
-                       return NT_STATUS_OK;
+                       return 0;
                }
 
                if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
                        DEBUG(0, ("Got invalid ltdb header length %d\n",
                                  (int)data.dsize));
-                       TALLOC_FREE(conn);
-                       return NT_STATUS_UNEXPECTED_IO_ERROR;
+                       return EIO;
                }
                data.dsize -= sizeof(struct ctdb_ltdb_header);
                data.dptr += sizeof(struct ctdb_ltdb_header);
@@ -1174,7 +1123,7 @@ NTSTATUS ctdbd_traverse(uint32_t db_id,
                        fn(key, data, private_data);
                }
        }
-       return NT_STATUS_OK;
+       return 0;
 }
 
 /*
@@ -1208,18 +1157,19 @@ static void smbd_ctdb_canonicalize_ip(const struct sockaddr_storage *in,
  * Register us as a server for a particular tcp connection
  */
 
-NTSTATUS ctdbd_register_ips(struct ctdbd_connection *conn,
-                           const struct sockaddr_storage *_server,
-                           const struct sockaddr_storage *_client,
-                           int (*cb)(uint32_t src_vnn, uint32_t dst_vnn,
-                                     uint64_t dst_srvid,
-                                     const uint8_t *msg, size_t msglen,
-                                     void *private_data),
-                           void *private_data)
+int ctdbd_register_ips(struct ctdbd_connection *conn,
+                      const struct sockaddr_storage *_server,
+                      const struct sockaddr_storage *_client,
+                      int (*cb)(struct tevent_context *ev,
+                                uint32_t src_vnn, uint32_t dst_vnn,
+                                uint64_t dst_srvid,
+                                const uint8_t *msg, size_t msglen,
+                                void *private_data),
+                      void *private_data)
 {
-       struct ctdb_control_tcp_addr p;
+       struct ctdb_connection p;
        TDB_DATA data = { .dptr = (uint8_t *)&p, .dsize = sizeof(p) };
-       NTSTATUS status;
+       int ret;
        struct sockaddr_storage client;
        struct sockaddr_storage server;
 
@@ -1232,25 +1182,25 @@ NTSTATUS ctdbd_register_ips(struct ctdbd_connection *conn,
 
        switch (client.ss_family) {
        case AF_INET:
-               memcpy(&p.dest.ip, &server, sizeof(p.dest.ip));
+               memcpy(&p.dst.ip, &server, sizeof(p.dst.ip));
                memcpy(&p.src.ip, &client, sizeof(p.src.ip));
                break;
        case AF_INET6:
-               memcpy(&p.dest.ip6, &server, sizeof(p.dest.ip6));
+               memcpy(&p.dst.ip6, &server, sizeof(p.dst.ip6));
                memcpy(&p.src.ip6, &client, sizeof(p.src.ip6));
                break;
        default:
-               return NT_STATUS_INTERNAL_ERROR;
+               return EIO;
        }
 
        /*
         * We want to be told about IP releases
         */
 
-       status = register_with_ctdbd(conn, CTDB_SRVID_RELEASE_IP,
-                                    cb, private_data);
-       if (!NT_STATUS_IS_OK(status)) {
-               return status;
+       ret = register_with_ctdbd(conn, CTDB_SRVID_RELEASE_IP,
+                                 cb, private_data);
+       if (ret != 0) {
+               return ret;
        }
 
        /*
@@ -1258,81 +1208,784 @@ NTSTATUS ctdbd_register_ips(struct ctdbd_connection *conn,
         * can send an extra ack to trigger a reset for our client, so it
         * immediately reconnects
         */
-       return ctdbd_control(conn, CTDB_CURRENT_NODE,
-                            CTDB_CONTROL_TCP_CLIENT, 0,
-                            CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL, NULL);
+       ret = ctdbd_control(conn, CTDB_CURRENT_NODE,
+                           CTDB_CONTROL_TCP_CLIENT, 0,
+                           CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL,
+                           NULL);
+       if (ret != 0) {
+               return ret;
+       }
+       return 0;
 }
 
 /*
   call a control on the local node
  */
-NTSTATUS ctdbd_control_local(struct ctdbd_connection *conn, uint32_t opcode,
-                            uint64_t srvid, uint32_t flags, TDB_DATA data,
-                            TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
-                            int *cstatus)
+int ctdbd_control_local(struct ctdbd_connection *conn, uint32_t opcode,
+                       uint64_t srvid, uint32_t flags, TDB_DATA data,
+                       TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
+                       int32_t *cstatus)
 {
-       return ctdbd_control(conn, CTDB_CURRENT_NODE, opcode, srvid, flags, data, mem_ctx, outdata, cstatus);
+       return ctdbd_control(conn, CTDB_CURRENT_NODE, opcode, srvid, flags, data,
+                            mem_ctx, outdata, cstatus);
 }
 
-NTSTATUS ctdb_watch_us(struct ctdbd_connection *conn)
+int ctdb_watch_us(struct ctdbd_connection *conn)
 {
-       struct ctdb_client_notify_register reg_data;
+       struct ctdb_notify_data_old reg_data;
        size_t struct_len;
-       NTSTATUS status;
-       int cstatus;
+       int ret;
+       int32_t cstatus;
 
        reg_data.srvid = CTDB_SRVID_SAMBA_NOTIFY;
        reg_data.len = 1;
        reg_data.notify_data[0] = 0;
 
-       struct_len = offsetof(struct ctdb_client_notify_register,
+       struct_len = offsetof(struct ctdb_notify_data_old,
                              notify_data) + reg_data.len;
 
-       status = ctdbd_control_local(
+       ret = ctdbd_control_local(
                conn, CTDB_CONTROL_REGISTER_NOTIFY, conn->rand_srvid, 0,
                make_tdb_data((uint8_t *)&reg_data, struct_len),
                NULL, NULL, &cstatus);
-       if (!NT_STATUS_IS_OK(status)) {
+       if (ret != 0) {
                DEBUG(1, ("ctdbd_control_local failed: %s\n",
-                         nt_errstr(status)));
+                         strerror(ret)));
        }
-       return status;
+       return ret;
 }
 
-NTSTATUS ctdb_unwatch(struct ctdbd_connection *conn)
+int ctdb_unwatch(struct ctdbd_connection *conn)
 {
-       struct ctdb_client_notify_deregister dereg_data;
-       NTSTATUS status;
-       int cstatus;
-
-       dereg_data.srvid = CTDB_SRVID_SAMBA_NOTIFY;
+       uint64_t srvid = CTDB_SRVID_SAMBA_NOTIFY;
+       int ret;
+       int32_t cstatus;
 
-       status = ctdbd_control_local(
+       ret = ctdbd_control_local(
                conn, CTDB_CONTROL_DEREGISTER_NOTIFY, conn->rand_srvid, 0,
-               make_tdb_data((uint8_t *)&dereg_data, sizeof(dereg_data)),
+               make_tdb_data((uint8_t *)&srvid, sizeof(srvid)),
                NULL, NULL, &cstatus);
-       if (!NT_STATUS_IS_OK(status)) {
+       if (ret != 0) {
                DEBUG(1, ("ctdbd_control_local failed: %s\n",
-                         nt_errstr(status)));
+                         strerror(ret)));
        }
-       return status;
+       return ret;
 }
 
-NTSTATUS ctdbd_probe(void)
+int ctdbd_probe(const char *sockname, int timeout)
 {
        /*
         * Do a very early check if ctdbd is around to avoid an abort and core
         * later
         */
        struct ctdbd_connection *conn = NULL;
-       NTSTATUS status;
+       int ret;
 
-       status = ctdbd_messaging_connection(talloc_tos(), &conn);
+       ret = ctdbd_init_connection(talloc_tos(), sockname, timeout,
+                                   &conn);
 
        /*
         * We only care if we can connect.
         */
        TALLOC_FREE(conn);
 
-       return status;
+       return ret;
+}
+
+struct ctdb_pkt_send_state {
+       struct ctdb_pkt_send_state *prev, *next;
+       struct tevent_context *ev;
+       struct ctdbd_connection *conn;
+
+       /* ctdb request id */
+       uint32_t reqid;
+
+       /* the associated tevent request */
+       struct tevent_req *req;
+
+       /* iovec array with data to send */
+       struct iovec _iov;
+       struct iovec *iov;
+       int iovcnt;
+
+       /* Initial packet length */
+       size_t packet_len;
+};
+
+static void ctdb_pkt_send_cleanup(struct tevent_req *req,
+                                 enum tevent_req_state req_state);
+
+/**
+ * Asynchronously send a ctdb packet given as iovec array
+ *
+ * Note: the passed iov array is not const here. Similar
+ * functions in samba take a const array and create a copy
+ * before calling iov_advance() on the array.
+ *
+ * This function will modify the iov array! But
+ * this is a static function and our only caller
+ * ctdb_parse_send/recv is preparared for this to
+ * happen!
+ **/
+static struct tevent_req *ctdb_pkt_send_send(TALLOC_CTX *mem_ctx,
+                                            struct tevent_context *ev,
+                                            struct ctdbd_connection *conn,
+                                            uint32_t reqid,
+                                            struct iovec *iov,
+                                            int iovcnt,
+                                            enum dbwrap_req_state *req_state)
+{
+       struct tevent_req *req = NULL;
+       struct ctdb_pkt_send_state *state = NULL;
+       ssize_t nwritten;
+       bool ok;
+
+       DBG_DEBUG("sending async ctdb reqid [%" PRIu32 "]\n", reqid);
+
+       req = tevent_req_create(mem_ctx, &state, struct ctdb_pkt_send_state);
+       if (req == NULL) {
+               return NULL;
+       }
+
+       *state = (struct ctdb_pkt_send_state) {
+               .ev = ev,
+               .conn = conn,
+               .req = req,
+               .reqid = reqid,
+               .iov = iov,
+               .iovcnt = iovcnt,
+               .packet_len = iov_buflen(iov, iovcnt),
+       };
+
+       tevent_req_set_cleanup_fn(req, ctdb_pkt_send_cleanup);
+
+       *req_state = DBWRAP_REQ_QUEUED;
+
+       if (ctdbd_conn_has_async_sends(conn)) {
+               /*
+                * Can't attempt direct write with messages already queued and
+                * possibly in progress
+                */
+               DLIST_ADD_END(conn->send_list, state);
+               return req;
+       }
+
+       /*
+        * Attempt a direct write. If this returns short, schedule the
+        * remaining data as an async write, otherwise we're already done.
+        */
+
+       nwritten = writev(conn->fd, state->iov, state->iovcnt);
+       if (nwritten == state->packet_len) {
+               DBG_DEBUG("Finished sending reqid [%" PRIu32 "]\n", reqid);
+
+               *req_state = DBWRAP_REQ_DISPATCHED;
+               tevent_req_done(req);
+               return tevent_req_post(req, ev);
+       }
+
+       if (nwritten == -1) {
+               if (errno != EINTR && errno != EAGAIN && errno != EWOULDBLOCK) {
+                       cluster_fatal("cluster write error\n");
+               }
+               nwritten = 0;
+       }
+
+       DBG_DEBUG("Posting async write of reqid [%" PRIu32"]"
+                 "after short write [%zd]\n", reqid, nwritten);
+
+       ok = iov_advance(&state->iov, &state->iovcnt, nwritten);
+       if (!ok) {
+               *req_state = DBWRAP_REQ_ERROR;
+               tevent_req_error(req, EIO);
+               return tevent_req_post(req, ev);
+       }
+
+       /*
+        * As this is the first async write req we post, we must enable
+        * fd-writable events.
+        */
+       TEVENT_FD_WRITEABLE(conn->fde);
+       DLIST_ADD_END(conn->send_list, state);
+       return req;
+}
+
+static int ctdb_pkt_send_state_destructor(struct ctdb_pkt_send_state *state)
+{
+       struct ctdbd_connection *conn = state->conn;
+
+       if (conn == NULL) {
+               return 0;
+       }
+
+       if (state->req == NULL) {
+               DBG_DEBUG("Removing cancelled reqid [%" PRIu32"]\n",
+                         state->reqid);
+               state->conn = NULL;
+               DLIST_REMOVE(conn->send_list, state);
+               return 0;
+       }
+
+       DBG_DEBUG("Reparenting cancelled reqid [%" PRIu32"]\n",
+                 state->reqid);
+
+       talloc_reparent(state->req, conn, state);
+       state->req = NULL;
+       return -1;
+}
+
+static void ctdb_pkt_send_cleanup(struct tevent_req *req,
+                                 enum tevent_req_state req_state)
+{
+       struct ctdb_pkt_send_state *state = tevent_req_data(
+               req, struct ctdb_pkt_send_state);
+       struct ctdbd_connection *conn = state->conn;
+       size_t missing_len = 0;
+
+       if (conn == NULL) {
+               return;
+       }
+
+       missing_len = iov_buflen(state->iov, state->iovcnt);
+       if (state->packet_len == missing_len) {
+               /*
+                * We haven't yet started sending this one, so we can just
+                * remove it from the pending list
+                */
+               missing_len = 0;
+       }
+       if (missing_len != 0) {
+               uint8_t *buf = NULL;
+
+               if (req_state != TEVENT_REQ_RECEIVED) {
+                       /*
+                        * Wait til the req_state is TEVENT_REQ_RECEIVED, as
+                        * that will be the final state when the request state
+                        * is talloc_free'd from tallloc_req_received(). Which
+                        * ensures we only run the following code *ONCE*!
+                        */
+                       return;
+               }
+
+               DBG_DEBUG("Cancelling in-flight reqid [%" PRIu32"]\n",
+                         state->reqid);
+               /*
+                * A request in progress of being sent. Reparent the iov buffer
+                * so we can continue sending the request. See also the comment
+                * in ctdbd_parse_send() when copying the key buffer.
+                */
+
+               buf = iov_concat(state, state->iov, state->iovcnt);
+               if (buf == NULL) {
+                       cluster_fatal("iov_concat error\n");
+                       return;
+               }
+
+               state->iovcnt = 1;
+               state->_iov.iov_base = buf;
+               state->_iov.iov_len = missing_len;
+               state->iov = &state->_iov;
+
+               talloc_set_destructor(state, ctdb_pkt_send_state_destructor);
+               return;
+       }
+
+       DBG_DEBUG("Removing pending reqid [%" PRIu32"]\n", state->reqid);
+
+       state->conn = NULL;
+       DLIST_REMOVE(conn->send_list, state);
+
+       if (!ctdbd_conn_has_async_sends(conn)) {
+               DBG_DEBUG("No more sends, disabling fd-writable events\n");
+               TEVENT_FD_NOT_WRITEABLE(conn->fde);
+       }
+}
+
+static int ctdb_pkt_send_handler(struct ctdbd_connection *conn)
+{
+       struct ctdb_pkt_send_state *state = NULL;
+       ssize_t nwritten;
+       ssize_t iovlen;
+       bool ok;
+
+       DBG_DEBUG("send handler\n");
+
+       if (!ctdbd_conn_has_async_sends(conn)) {
+               DBG_WARNING("Writable fd-event without pending send\n");
+               TEVENT_FD_NOT_WRITEABLE(conn->fde);
+               return 0;
+       }
+
+       state = conn->send_list;
+       iovlen = iov_buflen(state->iov, state->iovcnt);
+
+       nwritten = writev(conn->fd, state->iov, state->iovcnt);
+       if (nwritten == -1) {
+               if (errno != EINTR && errno != EAGAIN && errno != EWOULDBLOCK) {
+                       DBG_ERR("writev failed: %s\n", strerror(errno));
+                       cluster_fatal("cluster write error\n");
+               }
+               DBG_DEBUG("recoverable writev error, retry\n");
+               return 0;
+       }
+
+       if (nwritten < iovlen) {
+               DBG_DEBUG("short write\n");
+
+               ok = iov_advance(&state->iov, &state->iovcnt, nwritten);
+               if (!ok) {
+                       DBG_ERR("iov_advance failed\n");
+                       if (state->req == NULL) {
+                               TALLOC_FREE(state);
+                               return 0;
+                       }
+                       tevent_req_error(state->req, EIO);
+                       return 0;
+               }
+               return 0;
+       }
+
+       if (state->req == NULL) {
+               DBG_DEBUG("Finished sending cancelled reqid [%" PRIu32 "]\n",
+                         state->reqid);
+               TALLOC_FREE(state);
+               return 0;
+       }
+
+       DBG_DEBUG("Finished send request id [%" PRIu32 "]\n", state->reqid);
+
+       tevent_req_done(state->req);
+       return 0;
+}
+
+static int ctdb_pkt_send_recv(struct tevent_req *req)
+{
+       int ret;
+
+       if (tevent_req_is_unix_error(req, &ret)) {
+               tevent_req_received(req);
+               return ret;
+       }
+
+       tevent_req_received(req);
+       return 0;
+}
+
+struct ctdb_pkt_recv_state {
+       struct ctdb_pkt_recv_state *prev, *next;
+       struct tevent_context *ev;
+       struct ctdbd_connection *conn;
+
+       /* ctdb request id */
+       uint32_t reqid;
+
+       /* the associated tevent_req */
+       struct tevent_req *req;
+
+       /* pointer to allocated ctdb packet buffer */
+       struct ctdb_req_header *hdr;
+};
+
+static void ctdb_pkt_recv_cleanup(struct tevent_req *req,
+                                 enum tevent_req_state req_state);
+
+static struct tevent_req *ctdb_pkt_recv_send(TALLOC_CTX *mem_ctx,
+                                            struct tevent_context *ev,
+                                            struct ctdbd_connection *conn,
+                                            uint32_t reqid)
+{
+       struct tevent_req *req = NULL;
+       struct ctdb_pkt_recv_state *state = NULL;
+
+       req = tevent_req_create(mem_ctx, &state, struct ctdb_pkt_recv_state);
+       if (req == NULL) {
+               return NULL;
+       }
+
+       *state = (struct ctdb_pkt_recv_state) {
+               .ev = ev,
+               .conn = conn,
+               .reqid = reqid,
+               .req = req,
+       };
+
+       tevent_req_set_cleanup_fn(req, ctdb_pkt_recv_cleanup);
+
+       /*
+        * fd-readable event is always set for the fde, no need to deal with
+        * that here.
+        */
+
+       DLIST_ADD_END(conn->recv_list, state);
+       DBG_DEBUG("Posted receive reqid [%" PRIu32 "]\n", state->reqid);
+
+       return req;
+}
+
+static void ctdb_pkt_recv_cleanup(struct tevent_req *req,
+                                 enum tevent_req_state req_state)
+{
+       struct ctdb_pkt_recv_state *state = tevent_req_data(
+               req, struct ctdb_pkt_recv_state);
+       struct ctdbd_connection *conn = state->conn;
+
+       if (conn == NULL) {
+               return;
+       }
+       state->conn = NULL;
+       DLIST_REMOVE(conn->recv_list, state);
+}
+
+static int ctdb_pkt_recv_handler(struct ctdbd_connection *conn)
+{
+       struct ctdb_pkt_recv_state *state = NULL;
+       ssize_t nread;
+       ssize_t iovlen;
+       bool ok;
+
+       DBG_DEBUG("receive handler\n");
+
+       if (conn->read_state.iovs == NULL) {
+               conn->read_state.iov.iov_base = &conn->read_state.msglen;
+               conn->read_state.iov.iov_len = sizeof(conn->read_state.msglen);
+               conn->read_state.iovs = &conn->read_state.iov;
+               conn->read_state.iovcnt = 1;
+       }
+
+       iovlen = iov_buflen(conn->read_state.iovs, conn->read_state.iovcnt);
+
+       DBG_DEBUG("iovlen [%zd]\n", iovlen);
+
+       nread = readv(conn->fd, conn->read_state.iovs, conn->read_state.iovcnt);
+       if (nread == 0) {
+               cluster_fatal("cluster read error, peer closed connection\n");
+       }
+       if (nread == -1) {
+               if (errno != EINTR && errno != EAGAIN && errno != EWOULDBLOCK) {
+                       cluster_fatal("cluster read error\n");
+               }
+               DBG_DEBUG("recoverable error from readv, retry\n");
+               return 0;
+       }
+
+       if (nread < iovlen) {
+               DBG_DEBUG("iovlen [%zd] nread [%zd]\n", iovlen, nread);
+               ok = iov_advance(&conn->read_state.iovs,
+                                &conn->read_state.iovcnt,
+                                nread);
+               if (!ok) {
+                       return EIO;
+               }
+               return 0;
+       }
+
+       conn->read_state.iovs = NULL;
+       conn->read_state.iovcnt = 0;
+
+       if (conn->read_state.hdr == NULL) {
+               /*
+                * Going this way after reading the 4 initial byte message
+                * length
+                */
+               uint32_t msglen = conn->read_state.msglen;
+               uint8_t *readbuf = NULL;
+               size_t readlen;
+
+               DBG_DEBUG("msglen: %" PRIu32 "\n", msglen);
+
+               if (msglen < sizeof(struct ctdb_req_header)) {
+                       DBG_ERR("short message %" PRIu32 "\n", msglen);
+                       return EIO;
+               }
+
+               conn->read_state.hdr = talloc_size(conn, msglen);
+               if (conn->read_state.hdr == NULL) {
+                       return ENOMEM;
+               }
+               conn->read_state.hdr->length = msglen;
+               talloc_set_name_const(conn->read_state.hdr,
+                                     "struct ctdb_req_header");
+
+               readbuf = (uint8_t *)conn->read_state.hdr + sizeof(msglen);
+               readlen = msglen - sizeof(msglen);
+
+               conn->read_state.iov.iov_base = readbuf;
+               conn->read_state.iov.iov_len = readlen;
+               conn->read_state.iovs = &conn->read_state.iov;
+               conn->read_state.iovcnt = 1;
+
+               DBG_DEBUG("Scheduled packet read size %zd\n", readlen);
+               return 0;
+       }
+
+       /*
+        * Searching a list here is expected to be cheap, as messages are
+        * exepcted to be coming in more or less ordered and we should find the
+        * waiting request near the beginning of the list.
+        */
+       for (state = conn->recv_list; state != NULL; state = state->next) {
+               if (state->reqid == conn->read_state.hdr->reqid) {
+                       break;
+               }
+       }
+
+       if (state == NULL) {
+               DBG_ERR("Discarding async ctdb reqid %u\n",
+                       conn->read_state.hdr->reqid);
+               TALLOC_FREE(conn->read_state.hdr);
+               ZERO_STRUCT(conn->read_state);
+               return EINVAL;
+       }
+
+       DBG_DEBUG("Got reply for reqid [%" PRIu32 "]\n", state->reqid);
+
+       state->hdr = talloc_move(state, &conn->read_state.hdr);
+       ZERO_STRUCT(conn->read_state);
+       tevent_req_done(state->req);
+       return 0;
+}
+
+static int ctdb_pkt_recv_recv(struct tevent_req *req,
+                             TALLOC_CTX *mem_ctx,
+                             struct ctdb_req_header **_hdr)
+{
+       struct ctdb_pkt_recv_state *state = tevent_req_data(
+               req, struct ctdb_pkt_recv_state);
+       int error;
+
+       if (tevent_req_is_unix_error(req, &error)) {
+               DBG_ERR("ctdb_read_req failed %s\n", strerror(error));
+               tevent_req_received(req);
+               return error;
+       }
+
+       *_hdr = talloc_move(mem_ctx, &state->hdr);
+
+       tevent_req_received(req);
+       return 0;
+}
+
+static int ctdbd_connection_destructor(struct ctdbd_connection *c)
+{
+       TALLOC_FREE(c->fde);
+       if (c->fd != -1) {
+               close(c->fd);
+               c->fd = -1;
+       }
+
+       TALLOC_FREE(c->read_state.hdr);
+       ZERO_STRUCT(c->read_state);
+
+       while (c->send_list != NULL) {
+               struct ctdb_pkt_send_state *send_state = c->send_list;
+               DLIST_REMOVE(c->send_list, send_state);
+               send_state->conn = NULL;
+               tevent_req_defer_callback(send_state->req, send_state->ev);
+               tevent_req_error(send_state->req, EIO);
+       }
+
+       while (c->recv_list != NULL) {
+               struct ctdb_pkt_recv_state *recv_state = c->recv_list;
+               DLIST_REMOVE(c->recv_list, recv_state);
+               recv_state->conn = NULL;
+               tevent_req_defer_callback(recv_state->req, recv_state->ev);
+               tevent_req_error(recv_state->req, EIO);
+       }
+
+       return 0;
+}
+
+struct ctdbd_parse_state {
+       struct tevent_context *ev;
+       struct ctdbd_connection *conn;
+       uint32_t reqid;
+       TDB_DATA key;
+       uint8_t _keybuf[64];
+       struct ctdb_req_call_old ctdb_req;
+       struct iovec iov[2];
+       void (*parser)(TDB_DATA key,
+                      TDB_DATA data,
+                      void *private_data);
+       void *private_data;
+       enum dbwrap_req_state *req_state;
+};
+
+static void ctdbd_parse_pkt_send_done(struct tevent_req *subreq);
+static void ctdbd_parse_done(struct tevent_req *subreq);
+
+struct tevent_req *ctdbd_parse_send(TALLOC_CTX *mem_ctx,
+                                   struct tevent_context *ev,
+                                   struct ctdbd_connection *conn,
+                                   uint32_t db_id,
+                                   TDB_DATA key,
+                                   bool local_copy,
+                                   void (*parser)(TDB_DATA key,
+                                                  TDB_DATA data,
+                                                  void *private_data),
+                                   void *private_data,
+                                   enum dbwrap_req_state *req_state)
+{
+       struct tevent_req *req = NULL;
+       struct ctdbd_parse_state *state = NULL;
+       uint32_t flags;
+       uint32_t packet_length;
+       struct tevent_req *subreq = NULL;
+
+       req = tevent_req_create(mem_ctx, &state, struct ctdbd_parse_state);
+       if (req == NULL) {
+               *req_state = DBWRAP_REQ_ERROR;
+               return NULL;
+       }
+
+       *state = (struct ctdbd_parse_state) {
+               .ev = ev,
+               .conn = conn,
+               .reqid = ctdbd_next_reqid(conn),
+               .parser = parser,
+               .private_data = private_data,
+               .req_state = req_state,
+       };
+
+       flags = local_copy ? CTDB_WANT_READONLY : 0;
+       packet_length = offsetof(struct ctdb_req_call_old, data) + key.dsize;
+
+       /*
+        * Copy the key into our state, as ctdb_pkt_send_cleanup() requires that
+        * all passed iov elements have a lifetime longer that the tevent_req
+        * returned by ctdb_pkt_send_send(). This is required continue sending a
+        * the low level request into the ctdb socket, if a higher level
+        * ('this') request is canceled (or talloc free'd) by the application
+        * layer, without sending invalid packets to ctdb.
+        */
+       if (key.dsize > sizeof(state->_keybuf)) {
+               state->key.dptr = talloc_memdup(state, key.dptr, key.dsize);
+               if (tevent_req_nomem(state->key.dptr, req)) {
+                       return tevent_req_post(req, ev);
+               }
+       } else {
+               memcpy(state->_keybuf, key.dptr, key.dsize);
+               state->key.dptr = state->_keybuf;
+       }
+       state->key.dsize = key.dsize;
+
+       state->ctdb_req.hdr.length       = packet_length;
+       state->ctdb_req.hdr.ctdb_magic   = CTDB_MAGIC;
+       state->ctdb_req.hdr.ctdb_version = CTDB_PROTOCOL;
+       state->ctdb_req.hdr.operation    = CTDB_REQ_CALL;
+       state->ctdb_req.hdr.reqid        = state->reqid;
+       state->ctdb_req.flags            = flags;
+       state->ctdb_req.callid           = CTDB_FETCH_FUNC;
+       state->ctdb_req.db_id            = db_id;
+       state->ctdb_req.keylen           = state->key.dsize;
+
+       state->iov[0].iov_base = &state->ctdb_req;
+       state->iov[0].iov_len = offsetof(struct ctdb_req_call_old, data);
+       state->iov[1].iov_base = state->key.dptr;
+       state->iov[1].iov_len = state->key.dsize;
+
+       /*
+        * Note that ctdb_pkt_send_send()
+        * will modify state->iov using
+        * iov_advance() without making a copy.
+        */
+       subreq = ctdb_pkt_send_send(state,
+                                   ev,
+                                   conn,
+                                   state->reqid,
+                                   state->iov,
+                                   ARRAY_SIZE(state->iov),
+                                   req_state);
+       if (tevent_req_nomem(subreq, req)) {
+               *req_state = DBWRAP_REQ_ERROR;
+               return tevent_req_post(req, ev);
+       }
+       tevent_req_set_callback(subreq, ctdbd_parse_pkt_send_done, req);
+
+       return req;
+}
+
+static void ctdbd_parse_pkt_send_done(struct tevent_req *subreq)
+{
+       struct tevent_req *req = tevent_req_callback_data(
+               subreq, struct tevent_req);
+       struct ctdbd_parse_state *state = tevent_req_data(
+               req, struct ctdbd_parse_state);
+       int ret;
+
+       ret = ctdb_pkt_send_recv(subreq);
+       TALLOC_FREE(subreq);
+       if (tevent_req_error(req, ret)) {
+               DBG_DEBUG("ctdb_pkt_send_recv failed %s\n", strerror(ret));
+               return;
+       }
+
+       subreq = ctdb_pkt_recv_send(state,
+                                   state->ev,
+                                   state->conn,
+                                   state->reqid);
+       if (tevent_req_nomem(subreq, req)) {
+               return;
+       }
+
+       *state->req_state = DBWRAP_REQ_DISPATCHED;
+       tevent_req_set_callback(subreq, ctdbd_parse_done, req);
+       return;
+}
+
+static void ctdbd_parse_done(struct tevent_req *subreq)
+{
+       struct tevent_req *req = tevent_req_callback_data(
+               subreq, struct tevent_req);
+       struct ctdbd_parse_state *state = tevent_req_data(
+               req, struct ctdbd_parse_state);
+       struct ctdb_req_header *hdr = NULL;
+       struct ctdb_reply_call_old *reply = NULL;
+       int ret;
+
+       DBG_DEBUG("async parse request finished\n");
+
+       ret = ctdb_pkt_recv_recv(subreq, state, &hdr);
+       TALLOC_FREE(subreq);
+       if (tevent_req_error(req, ret)) {
+               DBG_ERR("ctdb_pkt_recv_recv returned %s\n", strerror(ret));
+               return;
+       }
+
+       if (hdr->operation != CTDB_REPLY_CALL) {
+               DBG_ERR("received invalid reply\n");
+               ctdb_packet_dump(hdr);
+               tevent_req_error(req, EIO);
+               return;
+       }
+
+       reply = (struct ctdb_reply_call_old *)hdr;
+
+       if (reply->datalen == 0) {
+               /*
+                * Treat an empty record as non-existing
+                */
+               tevent_req_error(req, ENOENT);
+               return;
+       }
+
+       state->parser(state->key,
+                     make_tdb_data(&reply->data[0], reply->datalen),
+                     state->private_data);
+
+       tevent_req_done(req);
+       return;
+}
+
+int ctdbd_parse_recv(struct tevent_req *req)
+{
+       int error;
+
+       if (tevent_req_is_unix_error(req, &error)) {
+               DBG_DEBUG("async parse returned %s\n", strerror(error));
+               tevent_req_received(req);
+               return error;
+       }
+
+       tevent_req_received(req);
+       return 0;
 }