Revert "s3:messages: allow messaging_filtered_read_send() to use wrapper tevent_context"

[samba.git] / source3 / lib / messages.c
diff --git a/source3/lib/messages.c b/source3/lib/messages.c

index ef5d679f6a356a74c039c732241a009cd6859400..864d758fb13ec4b1f72e4fa02cb0abb1e70e6d27 100644 (file)
--- a/source3/lib/messages.c
+++ b/source3/lib/messages.c
@@ -46,6 +46,7 @@
  */
  
  #include "includes.h"
+#include "lib/util/server_id.h"
  #include "dbwrap/dbwrap.h"
  #include "serverid.h"
  #include "messages.h"
@@ -55,7 +56,16 @@
  #include "lib/util/iov_buf.h"
  #include "lib/util/server_id_db.h"
  #include "lib/messages_dgm_ref.h"
+#include "lib/messages_ctdb.h"
+#include "lib/messages_ctdb_ref.h"
  #include "lib/messages_util.h"
+#include "cluster_support.h"
+#include "ctdbd_conn.h"
+#include "ctdb_srvids.h"
+
+#ifdef CLUSTER_SUPPORT
+#include "ctdb_protocol.h"
+#endif
  
  struct messaging_callback {
         struct messaging_callback *prev, *next;
@@ -66,24 +76,42 @@ struct messaging_callback {
         void *private_data;
  };
  
+struct messaging_registered_ev {
+       struct tevent_context *ev;
+       struct tevent_immediate *im;
+       size_t refcount;
+};
+
  struct messaging_context {
         struct server_id id;
         struct tevent_context *event_ctx;
         struct messaging_callback *callbacks;
  
+       struct messaging_rec *posted_msgs;
+
+       struct messaging_registered_ev *event_contexts;
+
         struct tevent_req **new_waiters;
-       unsigned num_new_waiters;
+       size_t num_new_waiters;
  
         struct tevent_req **waiters;
-       unsigned num_waiters;
+       size_t num_waiters;
  
         void *msg_dgm_ref;
-       struct messaging_backend *remote;
+       void *msg_ctdb_ref;
  
         struct server_id_db *names_db;
  };
  
+static struct messaging_rec *messaging_rec_dup(TALLOC_CTX *mem_ctx,
+                                              struct messaging_rec *rec);
+static bool messaging_dispatch_classic(struct messaging_context *msg_ctx,
+                                      struct messaging_rec *rec);
+static bool messaging_dispatch_waiters(struct messaging_context *msg_ctx,
+                                      struct tevent_context *ev,
+                                      struct messaging_rec *rec);
  static void messaging_dispatch_rec(struct messaging_context *msg_ctx,
+                                  struct tevent_context *ev,
                                    struct messaging_rec *rec);
  
  /****************************************************************************
@@ -105,7 +133,263 @@ static void ping_message(struct messaging_context *msg_ctx,
         messaging_send(msg_ctx, src, MSG_PONG, data);
  }
  
-static void messaging_recv_cb(const uint8_t *msg, size_t msg_len,
+struct messaging_rec *messaging_rec_create(
+       TALLOC_CTX *mem_ctx, struct server_id src, struct server_id dst,
+       uint32_t msg_type, const struct iovec *iov, int iovlen,
+       const int *fds, size_t num_fds)
+{
+       ssize_t buflen;
+       uint8_t *buf;
+       struct messaging_rec *result;
+
+       if (num_fds > INT8_MAX) {
+               return NULL;
+       }
+
+       buflen = iov_buflen(iov, iovlen);
+       if (buflen == -1) {
+               return NULL;
+       }
+       buf = talloc_array(mem_ctx, uint8_t, buflen);
+       if (buf == NULL) {
+               return NULL;
+       }
+       iov_buf(iov, iovlen, buf, buflen);
+
+       {
+               struct messaging_rec rec;
+               int64_t fds64[num_fds];
+               size_t i;
+
+               for (i=0; i<num_fds; i++) {
+                       fds64[i] = fds[i];
+               }
+
+               rec = (struct messaging_rec) {
+                       .msg_version = MESSAGE_VERSION, .msg_type = msg_type,
+                       .src = src, .dest = dst,
+                       .buf.data = buf, .buf.length = buflen,
+                       .num_fds = num_fds, .fds = fds64,
+               };
+
+               result = messaging_rec_dup(mem_ctx, &rec);
+       }
+
+       TALLOC_FREE(buf);
+
+       return result;
+}
+
+static bool messaging_register_event_context(struct messaging_context *ctx,
+                                            struct tevent_context *ev)
+{
+       size_t i, num_event_contexts;
+       struct messaging_registered_ev *free_reg = NULL;
+       struct messaging_registered_ev *tmp;
+
+       num_event_contexts = talloc_array_length(ctx->event_contexts);
+
+       for (i=0; i<num_event_contexts; i++) {
+               struct messaging_registered_ev *reg = &ctx->event_contexts[i];
+
+               if (reg->refcount == 0) {
+                       if (reg->ev != NULL) {
+                               abort();
+                       }
+                       free_reg = reg;
+                       /*
+                        * We continue here and may find another
+                        * free_req, but the important thing is
+                        * that we continue to search for an
+                        * existing registration in the loop.
+                        */
+                       continue;
+               }
+
+               if (reg->ev == ev) {
+                       reg->refcount += 1;
+                       return true;
+               }
+       }
+
+       if (free_reg == NULL) {
+               struct tevent_immediate *im = NULL;
+
+               im = tevent_create_immediate(ctx);
+               if (im == NULL) {
+                       return false;
+               }
+
+               tmp = talloc_realloc(ctx, ctx->event_contexts,
+                                    struct messaging_registered_ev,
+                                    num_event_contexts+1);
+               if (tmp == NULL) {
+                       return false;
+               }
+               ctx->event_contexts = tmp;
+
+               free_reg = &ctx->event_contexts[num_event_contexts];
+               free_reg->im = talloc_move(ctx->event_contexts, &im);
+       }
+
+       /*
+        * free_reg->im might be cached
+        */
+       free_reg->ev = ev;
+       free_reg->refcount = 1;
+
+       return true;
+}
+
+static bool messaging_deregister_event_context(struct messaging_context *ctx,
+                                              struct tevent_context *ev)
+{
+       size_t i, num_event_contexts;
+
+       num_event_contexts = talloc_array_length(ctx->event_contexts);
+
+       for (i=0; i<num_event_contexts; i++) {
+               struct messaging_registered_ev *reg = &ctx->event_contexts[i];
+
+               if (reg->refcount == 0) {
+                       continue;
+               }
+
+               if (reg->ev == ev) {
+                       reg->refcount -= 1;
+
+                       if (reg->refcount == 0) {
+                               /*
+                                * The primary event context
+                                * is never unregistered using
+                                * messaging_deregister_event_context()
+                                * it's only registered using
+                                * messaging_register_event_context().
+                                */
+                               SMB_ASSERT(ev != ctx->event_ctx);
+                               SMB_ASSERT(reg->ev != ctx->event_ctx);
+
+                               /*
+                                * Not strictly necessary, just
+                                * paranoia
+                                */
+                               reg->ev = NULL;
+
+                               /*
+                                * Do not talloc_free(reg->im),
+                                * recycle immediates events.
+                                *
+                                * We just invalidate it using
+                                * the primary event context,
+                                * which is never unregistered.
+                                */
+                               tevent_schedule_immediate(reg->im,
+                                                         ctx->event_ctx,
+                                                         NULL, NULL);
+                       }
+                       return true;
+               }
+       }
+       return false;
+}
+
+static void messaging_post_main_event_context(struct tevent_context *ev,
+                                             struct tevent_immediate *im,
+                                             void *private_data)
+{
+       struct messaging_context *ctx = talloc_get_type_abort(
+               private_data, struct messaging_context);
+
+       while (ctx->posted_msgs != NULL) {
+               struct messaging_rec *rec = ctx->posted_msgs;
+               bool consumed;
+
+               DLIST_REMOVE(ctx->posted_msgs, rec);
+
+               consumed = messaging_dispatch_classic(ctx, rec);
+               if (!consumed) {
+                       consumed = messaging_dispatch_waiters(
+                               ctx, ctx->event_ctx, rec);
+               }
+
+               if (!consumed) {
+                       uint8_t i;
+
+                       for (i=0; i<rec->num_fds; i++) {
+                               close(rec->fds[i]);
+                       }
+               }
+
+               TALLOC_FREE(rec);
+       }
+}
+
+static void messaging_post_sub_event_context(struct tevent_context *ev,
+                                            struct tevent_immediate *im,
+                                            void *private_data)
+{
+       struct messaging_context *ctx = talloc_get_type_abort(
+               private_data, struct messaging_context);
+       struct messaging_rec *rec, *next;
+
+       for (rec = ctx->posted_msgs; rec != NULL; rec = next) {
+               bool consumed;
+
+               next = rec->next;
+
+               consumed = messaging_dispatch_waiters(ctx, ev, rec);
+               if (consumed) {
+                       DLIST_REMOVE(ctx->posted_msgs, rec);
+                       TALLOC_FREE(rec);
+               }
+       }
+}
+
+static bool messaging_alert_event_contexts(struct messaging_context *ctx)
+{
+       size_t i, num_event_contexts;
+
+       num_event_contexts = talloc_array_length(ctx->event_contexts);
+
+       for (i=0; i<num_event_contexts; i++) {
+               struct messaging_registered_ev *reg = &ctx->event_contexts[i];
+
+               if (reg->refcount == 0) {
+                       continue;
+               }
+
+               /*
+                * We depend on schedule_immediate to work
+                * multiple times. Might be a bit inefficient,
+                * but this needs to be proven in tests. The
+                * alternatively would be to track whether the
+                * immediate has already been scheduled. For
+                * now, avoid that complexity here.
+                *
+                * reg->ev and ctx->event_ctx can't
+                * be wrapper tevent_context pointers
+                * so we don't need to use
+                * tevent_context_same_loop().
+                */
+
+               if (reg->ev == ctx->event_ctx) {
+                       tevent_schedule_immediate(
+                               reg->im, reg->ev,
+                               messaging_post_main_event_context,
+                               ctx);
+               } else {
+                       tevent_schedule_immediate(
+                               reg->im, reg->ev,
+                               messaging_post_sub_event_context,
+                               ctx);
+               }
+
+       }
+       return true;
+}
+
+static void messaging_recv_cb(struct tevent_context *ev,
+                             const uint8_t *msg, size_t msg_len,
                               int *fds, size_t num_fds,
                               void *private_data)
  {
@@ -149,7 +433,12 @@ static void messaging_recv_cb(const uint8_t *msg, size_t msg_len,
                   (unsigned)rec.msg_type, rec.buf.length, num_fds,
                   server_id_str_buf(rec.src, &idbuf));
  
-       messaging_dispatch_rec(msg_ctx, &rec);
+       if (server_id_same_process(&rec.src, &msg_ctx->id)) {
+               DBG_DEBUG("Ignoring self-send\n");
+               goto close_fail;
+       }
+
+       messaging_dispatch_rec(msg_ctx, ev, &rec);
         return;
  
  close_fail:
@@ -160,7 +449,7 @@ close_fail:
  
  static int messaging_context_destructor(struct messaging_context *ctx)
  {
-       unsigned i;
+       size_t i;
  
         for (i=0; i<ctx->num_new_waiters; i++) {
                 if (ctx->new_waiters[i] != NULL) {
@@ -175,6 +464,13 @@ static int messaging_context_destructor(struct messaging_context *ctx)
                 }
         }
  
+       /*
+        * The immediates from messaging_alert_event_contexts
+        * reference "ctx". Don't let them outlive the
+        * messaging_context we're destroying here.
+        */
+       TALLOC_FREE(ctx->event_contexts);
+
         return 0;
  }
  
@@ -183,88 +479,122 @@ static const char *private_path(const char *name)
         return talloc_asprintf(talloc_tos(), "%s/%s", lp_private_dir(), name);
  }
  
-struct messaging_context *messaging_init(TALLOC_CTX *mem_ctx, 
-                                        struct tevent_context *ev)
+static NTSTATUS messaging_init_internal(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       struct messaging_context **pmsg_ctx)
  {
+       TALLOC_CTX *frame;
         struct messaging_context *ctx;
+       NTSTATUS status = NT_STATUS_UNSUCCESSFUL;
         int ret;
         const char *lck_path;
         const char *priv_path;
         bool ok;
  
-       if (!(ctx = talloc_zero(mem_ctx, struct messaging_context))) {
-               return NULL;
-       }
-
-       ctx->id = (struct server_id) {
-               .pid = getpid(), .vnn = NONCLUSTER_VNN
-       };
-
-       ctx->event_ctx = ev;
+       /*
+        * sec_init() *must* be called before any other
+        * functions that use sec_XXX(). e.g. sec_initial_uid().
+        */
  
         sec_init();
  
-       lck_path = lock_path("msg.lock");
+       if (tevent_context_is_wrapper(ev)) {
+               /* This is really a programmer error! */
+               DBG_ERR("Should not be used with a wrapper tevent context\n");
+               return NT_STATUS_INVALID_PARAMETER;
+       }
+
+       lck_path = lock_path(talloc_tos(), "msg.lock");
         if (lck_path == NULL) {
-               TALLOC_FREE(ctx);
-               return NULL;
+               return NT_STATUS_NO_MEMORY;
         }
  
-       ok = directory_create_or_exist_strict(lck_path, sec_initial_uid(),
+       ok = directory_create_or_exist_strict(lck_path,
+                                             sec_initial_uid(),
                                               0755);
         if (!ok) {
-               DEBUG(10, ("%s: Could not create lock directory: %s\n",
-                          __func__, strerror(errno)));
-               TALLOC_FREE(ctx);
-               return NULL;
+               DBG_DEBUG("Could not create lock directory: %s\n",
+                         strerror(errno));
+               return NT_STATUS_ACCESS_DENIED;
         }
  
         priv_path = private_path("msg.sock");
         if (priv_path == NULL) {
-               TALLOC_FREE(ctx);
-               return NULL;
+               return NT_STATUS_NO_MEMORY;
         }
  
         ok = directory_create_or_exist_strict(priv_path, sec_initial_uid(),
                                               0700);
         if (!ok) {
-               DEBUG(10, ("%s: Could not create msg directory: %s\n",
-                          __func__, strerror(errno)));
-               TALLOC_FREE(ctx);
-               return NULL;
+               DBG_DEBUG("Could not create msg directory: %s\n",
+                         strerror(errno));
+               return NT_STATUS_ACCESS_DENIED;
+       }
+
+       frame = talloc_stackframe();
+       if (frame == NULL) {
+               return NT_STATUS_NO_MEMORY;
         }
  
-       ctx->msg_dgm_ref = messaging_dgm_ref(
-               ctx, ctx->event_ctx, &ctx->id.unique_id,
-               priv_path, lck_path, messaging_recv_cb, ctx, &ret);
+       ctx = talloc_zero(frame, struct messaging_context);
+       if (ctx == NULL) {
+               status = NT_STATUS_NO_MEMORY;
+               goto done;
+       }
+
+       ctx->id = (struct server_id) {
+               .pid = getpid(), .vnn = NONCLUSTER_VNN
+       };
+
+       ctx->event_ctx = ev;
+
+       ok = messaging_register_event_context(ctx, ev);
+       if (!ok) {
+               status = NT_STATUS_NO_MEMORY;
+               goto done;
+       }
  
+       ctx->msg_dgm_ref = messaging_dgm_ref(ctx,
+                                            ctx->event_ctx,
+                                            &ctx->id.unique_id,
+                                            priv_path,
+                                            lck_path,
+                                            messaging_recv_cb,
+                                            ctx,
+                                            &ret);
         if (ctx->msg_dgm_ref == NULL) {
                 DEBUG(2, ("messaging_dgm_ref failed: %s\n", strerror(ret)));
-               TALLOC_FREE(ctx);
-               return NULL;
+               status = map_nt_error_from_unix(ret);
+               goto done;
         }
-
         talloc_set_destructor(ctx, messaging_context_destructor);
  
+#ifdef CLUSTER_SUPPORT
         if (lp_clustering()) {
-               ret = messaging_ctdbd_init(ctx, ctx, &ctx->remote);
-
-               if (ret != 0) {
-                       DEBUG(2, ("messaging_ctdbd_init failed: %s\n",
-                                 strerror(ret)));
-                       TALLOC_FREE(ctx);
-                       return NULL;
+               ctx->msg_ctdb_ref = messaging_ctdb_ref(
+                       ctx, ctx->event_ctx,
+                       lp_ctdbd_socket(), lp_ctdb_timeout(),
+                       ctx->id.unique_id, messaging_recv_cb, ctx, &ret);
+               if (ctx->msg_ctdb_ref == NULL) {
+                       DBG_NOTICE("messaging_ctdb_ref failed: %s\n",
+                                  strerror(ret));
+                       status = map_nt_error_from_unix(ret);
+                       goto done;
                 }
         }
+#endif
+
         ctx->id.vnn = get_my_vnn();
  
-       ctx->names_db = server_id_db_init(
-               ctx, ctx->id, lp_lock_directory(), 0,
-               TDB_INCOMPATIBLE_HASH|TDB_CLEAR_IF_FIRST);
+       ctx->names_db = server_id_db_init(ctx,
+                                         ctx->id,
+                                         lp_lock_directory(),
+                                         0,
+                                         TDB_INCOMPATIBLE_HASH|TDB_CLEAR_IF_FIRST);
         if (ctx->names_db == NULL) {
-               DEBUG(10, ("%s: server_id_db_init failed\n", __func__));
-               TALLOC_FREE(ctx);
-               return NULL;
+               DBG_DEBUG("server_id_db_init failed\n");
+               status = NT_STATUS_NO_MEMORY;
+               goto done;
         }
  
         messaging_register(ctx, NULL, MSG_PING, ping_message);
@@ -280,6 +610,28 @@ struct messaging_context *messaging_init(TALLOC_CTX *mem_ctx,
                 DBG_DEBUG("my id: %s\n", server_id_str_buf(ctx->id, &tmp));
         }
  
+       *pmsg_ctx = talloc_steal(mem_ctx, ctx);
+
+       status = NT_STATUS_OK;
+done:
+       TALLOC_FREE(frame);
+
+       return status;
+}
+
+struct messaging_context *messaging_init(TALLOC_CTX *mem_ctx,
+                                        struct tevent_context *ev)
+{
+       struct messaging_context *ctx = NULL;
+       NTSTATUS status;
+
+       status = messaging_init_internal(mem_ctx,
+                                        ev,
+                                        &ctx);
+       if (!NT_STATUS_IS_OK(status)) {
+               return NULL;
+       }
+
         return ctx;
  }
  
@@ -297,12 +649,13 @@ NTSTATUS messaging_reinit(struct messaging_context *msg_ctx)
         char *lck_path;
  
         TALLOC_FREE(msg_ctx->msg_dgm_ref);
+       TALLOC_FREE(msg_ctx->msg_ctdb_ref);
  
         msg_ctx->id = (struct server_id) {
                 .pid = getpid(), .vnn = msg_ctx->id.vnn
         };
  
-       lck_path = lock_path("msg.lock");
+       lck_path = lock_path(talloc_tos(), "msg.lock");
         if (lck_path == NULL) {
                 return NT_STATUS_NO_MEMORY;
         }
@@ -318,12 +671,14 @@ NTSTATUS messaging_reinit(struct messaging_context *msg_ctx)
         }
  
         if (lp_clustering()) {
-               ret = messaging_ctdbd_reinit(msg_ctx, msg_ctx,
-                                            msg_ctx->remote);
-
-               if (ret != 0) {
-                       DEBUG(1, ("messaging_ctdbd_init failed: %s\n",
-                                 strerror(ret)));
+               msg_ctx->msg_ctdb_ref = messaging_ctdb_ref(
+                       msg_ctx, msg_ctx->event_ctx,
+                       lp_ctdbd_socket(), lp_ctdb_timeout(),
+                       msg_ctx->id.unique_id, messaging_recv_cb, msg_ctx,
+                       &ret);
+               if (msg_ctx->msg_ctdb_ref == NULL) {
+                       DBG_NOTICE("messaging_ctdb_ref failed: %s\n",
+                                  strerror(ret));
                         return map_nt_error_from_unix(ret);
                 }
         }
@@ -430,6 +785,32 @@ NTSTATUS messaging_send_buf(struct messaging_context *msg_ctx,
         return messaging_send(msg_ctx, server, msg_type, &blob);
  }
  
+static int messaging_post_self(struct messaging_context *msg_ctx,
+                              struct server_id src, struct server_id dst,
+                              uint32_t msg_type,
+                              const struct iovec *iov, int iovlen,
+                              const int *fds, size_t num_fds)
+{
+       struct messaging_rec *rec;
+       bool ok;
+
+       rec = messaging_rec_create(
+               msg_ctx, src, dst, msg_type, iov, iovlen, fds, num_fds);
+       if (rec == NULL) {
+               return ENOMEM;
+       }
+
+       ok = messaging_alert_event_contexts(msg_ctx);
+       if (!ok) {
+               TALLOC_FREE(rec);
+               return ENOMEM;
+       }
+
+       DLIST_ADD_END(msg_ctx->posted_msgs, rec);
+
+       return 0;
+}
+
  int messaging_send_iov_from(struct messaging_context *msg_ctx,
                             struct server_id src, struct server_id dst,
                             uint32_t msg_type,
@@ -448,22 +829,25 @@ int messaging_send_iov_from(struct messaging_context *msg_ctx,
                 return EINVAL;
         }
  
+       if (server_id_equal(&dst, &msg_ctx->id)) {
+               ret = messaging_post_self(msg_ctx, src, dst, msg_type,
+                                         iov, iovlen, fds, num_fds);
+               return ret;
+       }
+
+       message_hdr_put(hdr, msg_type, src, dst);
+       iov2[0] = (struct iovec){ .iov_base = hdr, .iov_len = sizeof(hdr) };
+       memcpy(&iov2[1], iov, iovlen * sizeof(*iov));
+
         if (dst.vnn != msg_ctx->id.vnn) {
                 if (num_fds > 0) {
                         return ENOSYS;
                 }
  
-               ret = msg_ctx->remote->send_fn(src, dst,
-                                              msg_type, iov, iovlen,
-                                              NULL, 0,
-                                              msg_ctx->remote);
+               ret = messaging_ctdb_send(dst.vnn, dst.pid, iov2, iovlen+1);
                 return ret;
         }
  
-       message_hdr_put(hdr, msg_type, src, dst);
-       iov2[0] = (struct iovec){ .iov_base = hdr, .iov_len = sizeof(hdr) };
-       memcpy(&iov2[1], iov, iovlen * sizeof(*iov));
-
         ret = messaging_dgm_send(dst.pid, iov2, iovlen+1, fds, num_fds);
  
         if (ret == EACCES) {
@@ -473,6 +857,17 @@ int messaging_send_iov_from(struct messaging_context *msg_ctx,
                 unbecome_root();
         }
  
+       if (ret == ECONNREFUSED) {
+               /*
+                * Linux returns this when a socket exists in the file
+                * system without a listening process. This is not
+                * documented in susv4 or the linux manpages, but it's
+                * easily testable. For the higher levels this is the
+                * same as "destination does not exist"
+                */
+               ret = ENOENT;
+       }
+
         return ret;
  }
  
@@ -491,14 +886,91 @@ NTSTATUS messaging_send_iov(struct messaging_context *msg_ctx,
         return NT_STATUS_OK;
  }
  
+struct send_all_state {
+       struct messaging_context *msg_ctx;
+       int msg_type;
+       const void *buf;
+       size_t len;
+};
+
+static int send_all_fn(pid_t pid, void *private_data)
+{
+       struct send_all_state *state = private_data;
+       NTSTATUS status;
+
+       if (pid == getpid()) {
+               DBG_DEBUG("Skip ourselves in messaging_send_all\n");
+               return 0;
+       }
+
+       status = messaging_send_buf(state->msg_ctx, pid_to_procid(pid),
+                                   state->msg_type, state->buf, state->len);
+       if (!NT_STATUS_IS_OK(status)) {
+               DBG_WARNING("messaging_send_buf to %ju failed: %s\n",
+                           (uintmax_t)pid, nt_errstr(status));
+       }
+
+       return 0;
+}
+
+void messaging_send_all(struct messaging_context *msg_ctx,
+                       int msg_type, const void *buf, size_t len)
+{
+       struct send_all_state state = {
+               .msg_ctx = msg_ctx, .msg_type = msg_type,
+               .buf = buf, .len = len
+       };
+       int ret;
+
+#ifdef CLUSTER_SUPPORT
+       if (lp_clustering()) {
+               struct ctdbd_connection *conn = messaging_ctdb_connection();
+               uint8_t msghdr[MESSAGE_HDR_LENGTH];
+               struct iovec iov[] = {
+                       { .iov_base = msghdr,
+                         .iov_len = sizeof(msghdr) },
+                       { .iov_base = discard_const_p(void, buf),
+                         .iov_len = len }
+               };
+
+               message_hdr_put(msghdr, msg_type, messaging_server_id(msg_ctx),
+                               (struct server_id) {0});
+
+               ret = ctdbd_messaging_send_iov(
+                       conn, CTDB_BROADCAST_CONNECTED,
+                       CTDB_SRVID_SAMBA_PROCESS,
+                       iov, ARRAY_SIZE(iov));
+               if (ret != 0) {
+                       DBG_WARNING("ctdbd_messaging_send_iov failed: %s\n",
+                                   strerror(ret));
+               }
+
+               return;
+       }
+#endif
+
+       ret = messaging_dgm_forall(send_all_fn, &state);
+       if (ret != 0) {
+               DBG_WARNING("messaging_dgm_forall failed: %s\n",
+                           strerror(ret));
+       }
+}
+
  static struct messaging_rec *messaging_rec_dup(TALLOC_CTX *mem_ctx,
                                                struct messaging_rec *rec)
  {
         struct messaging_rec *result;
         size_t fds_size = sizeof(int64_t) * rec->num_fds;
+       size_t payload_len;
+
+       payload_len = rec->buf.length + fds_size;
+       if (payload_len < rec->buf.length) {
+               /* overflow */
+               return NULL;
+       }
  
         result = talloc_pooled_object(mem_ctx, struct messaging_rec, 2,
-                                     rec->buf.length + fds_size);
+                                     payload_len);
         if (result == NULL) {
                 return NULL;
         }
@@ -520,7 +992,8 @@ static struct messaging_rec *messaging_rec_dup(TALLOC_CTX *mem_ctx,
  struct messaging_filtered_read_state {
         struct tevent_context *ev;
         struct messaging_context *msg_ctx;
-       void *tevent_handle;
+       struct messaging_dgm_fde *fde;
+       struct messaging_ctdb_fde *cluster_fde;
  
         bool (*filter)(struct messaging_rec *rec, void *private_data);
         void *private_data;
@@ -540,6 +1013,7 @@ struct tevent_req *messaging_filtered_read_send(
         struct tevent_req *req;
         struct messaging_filtered_read_state *state;
         size_t new_waiters_len;
+       bool ok;
  
         req = tevent_req_create(mem_ctx, &state,
                                 struct messaging_filtered_read_state);
@@ -551,18 +1025,32 @@ struct tevent_req *messaging_filtered_read_send(
         state->filter = filter;
         state->private_data = private_data;
  
+       if (tevent_context_is_wrapper(ev)) {
+               /* This is really a programmer error! */
+               DBG_ERR("Wrapper tevent context doesn't use main context.\n");
+               tevent_req_error(req, EINVAL);
+               return tevent_req_post(req, ev);
+       }
+
         /*
          * We have to defer the callback here, as we might be called from
          * within a different tevent_context than state->ev
          */
         tevent_req_defer_callback(req, state->ev);
  
-       state->tevent_handle = messaging_dgm_register_tevent_context(
-               state, ev);
-       if (tevent_req_nomem(state->tevent_handle, req)) {
+       state->fde = messaging_dgm_register_tevent_context(state, ev);
+       if (tevent_req_nomem(state->fde, req)) {
                 return tevent_req_post(req, ev);
         }
  
+       if (lp_clustering()) {
+               state->cluster_fde =
+                       messaging_ctdb_register_tevent_context(state, ev);
+               if (tevent_req_nomem(state->cluster_fde, req)) {
+                       return tevent_req_post(req, ev);
+               }
+       }
+
         /*
          * We add ourselves to the "new_waiters" array, not the "waiters"
          * array. If we are called from within messaging_read_done,
@@ -588,6 +1076,12 @@ struct tevent_req *messaging_filtered_read_send(
         msg_ctx->num_new_waiters += 1;
         tevent_req_set_cleanup_fn(req, messaging_filtered_read_cleanup);
  
+       ok = messaging_register_event_context(msg_ctx, ev);
+       if (!ok) {
+               tevent_req_oom(req);
+               return tevent_req_post(req, ev);
+       }
+
         return req;
  }
  
@@ -597,11 +1091,18 @@ static void messaging_filtered_read_cleanup(struct tevent_req *req,
         struct messaging_filtered_read_state *state = tevent_req_data(
                 req, struct messaging_filtered_read_state);
         struct messaging_context *msg_ctx = state->msg_ctx;
-       unsigned i;
+       size_t i;
+       bool ok;
  
         tevent_req_set_cleanup_fn(req, NULL);
  
-       TALLOC_FREE(state->tevent_handle);
+       TALLOC_FREE(state->fde);
+       TALLOC_FREE(state->cluster_fde);
+
+       ok = messaging_deregister_event_context(msg_ctx, state->ev);
+       if (!ok) {
+               abort();
+       }
  
         /*
          * Just set the [new_]waiters entry to NULL, be careful not to mess
@@ -734,87 +1235,6 @@ int messaging_read_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx,
         return 0;
  }
  
-struct messaging_handler_state {
-       struct tevent_context *ev;
-       struct messaging_context *msg_ctx;
-       uint32_t msg_type;
-       bool (*handler)(struct messaging_context *msg_ctx,
-                       struct messaging_rec **rec, void *private_data);
-       void *private_data;
-};
-
-static void messaging_handler_got_msg(struct tevent_req *subreq);
-
-struct tevent_req *messaging_handler_send(
-       TALLOC_CTX *mem_ctx, struct tevent_context *ev,
-       struct messaging_context *msg_ctx, uint32_t msg_type,
-       bool (*handler)(struct messaging_context *msg_ctx,
-                       struct messaging_rec **rec, void *private_data),
-       void *private_data)
-{
-       struct tevent_req *req, *subreq;
-       struct messaging_handler_state *state;
-
-       req = tevent_req_create(mem_ctx, &state,
-                               struct messaging_handler_state);
-       if (req == NULL) {
-               return NULL;
-       }
-       state->ev = ev;
-       state->msg_ctx = msg_ctx;
-       state->msg_type = msg_type;
-       state->handler = handler;
-       state->private_data = private_data;
-
-       subreq = messaging_read_send(state, state->ev, state->msg_ctx,
-                                    state->msg_type);
-       if (tevent_req_nomem(subreq, req)) {
-               return tevent_req_post(req, ev);
-       }
-       tevent_req_set_callback(subreq, messaging_handler_got_msg, req);
-       return req;
-}
-
-static void messaging_handler_got_msg(struct tevent_req *subreq)
-{
-       struct tevent_req *req = tevent_req_callback_data(
-               subreq, struct tevent_req);
-       struct messaging_handler_state *state = tevent_req_data(
-               req, struct messaging_handler_state);
-       struct messaging_rec *rec;
-       int ret;
-       bool ok;
-
-       ret = messaging_read_recv(subreq, state, &rec);
-       TALLOC_FREE(subreq);
-       if (tevent_req_error(req, ret)) {
-               return;
-       }
-
-       subreq = messaging_read_send(state, state->ev, state->msg_ctx,
-                                    state->msg_type);
-       if (tevent_req_nomem(subreq, req)) {
-               return;
-       }
-       tevent_req_set_callback(subreq, messaging_handler_got_msg, req);
-
-       ok = state->handler(state->msg_ctx, &rec, state->private_data);
-       TALLOC_FREE(rec);
-       if (ok) {
-               /*
-                * Next round
-                */
-               return;
-       }
-       TALLOC_FREE(subreq);
-       tevent_req_done(req);
-}
-
-int messaging_handler_recv(struct tevent_req *req)
-{
-       return tevent_req_simple_recv_unix(req);
-}
-
  static bool messaging_append_new_waiters(struct messaging_context *msg_ctx)
  {
         if (msg_ctx->num_new_waiters == 0) {
@@ -843,17 +1263,14 @@ static bool messaging_append_new_waiters(struct messaging_context *msg_ctx)
         return true;
  }
  
-/*
-  Dispatch one messaging_rec
-*/
-static void messaging_dispatch_rec(struct messaging_context *msg_ctx,
-                                  struct messaging_rec *rec)
+static bool messaging_dispatch_classic(struct messaging_context *msg_ctx,
+                                      struct messaging_rec *rec)
  {
         struct messaging_callback *cb, *next;
-       unsigned i;
-       size_t j;
  
         for (cb = msg_ctx->callbacks; cb != NULL; cb = next) {
+               size_t j;
+
                 next = cb->next;
                 if (cb->msg_type != rec->msg_type) {
                         continue;
@@ -872,22 +1289,20 @@ static void messaging_dispatch_rec(struct messaging_context *msg_ctx,
                 cb->fn(msg_ctx, cb->private_data, rec->msg_type,
                        rec->src, &rec->buf);
  
-               /*
-                * we continue looking for matching messages after finding
-                * one. This matters for subsystems like the internal notify
-                * code which register more than one handler for the same
-                * message type
-                */
+               return true;
         }
  
+       return false;
+}
+
+static bool messaging_dispatch_waiters(struct messaging_context *msg_ctx,
+                                      struct tevent_context *ev,
+                                      struct messaging_rec *rec)
+{
+       size_t i;
+
         if (!messaging_append_new_waiters(msg_ctx)) {
-               for (j=0; j < rec->num_fds; j++) {
-                       int fd = rec->fds[j];
-                       close(fd);
-               }
-               rec->num_fds = 0;
-               rec->fds = NULL;
-               return;
+               return false;
         }
  
         i = 0;
@@ -915,24 +1330,76 @@ static void messaging_dispatch_rec(struct messaging_context *msg_ctx,
  
                 state = tevent_req_data(
                         req, struct messaging_filtered_read_state);
-               if (state->filter(rec, state->private_data)) {
+               if ((ev == state->ev) &&
+                   state->filter(rec, state->private_data)) {
                         messaging_filtered_read_done(req, rec);
-
-                       /*
-                        * Only the first one gets the fd-array
-                        */
-                       rec->num_fds = 0;
-                       rec->fds = NULL;
+                       return true;
                 }
  
                 i += 1;
         }
  
+       return false;
+}
+
+/*
+  Dispatch one messaging_rec
+*/
+static void messaging_dispatch_rec(struct messaging_context *msg_ctx,
+                                  struct tevent_context *ev,
+                                  struct messaging_rec *rec)
+{
+       bool consumed;
+       size_t i;
+
+       /*
+        * ev and msg_ctx->event_ctx can't be wrapper tevent_context pointers
+        * so we don't need to use tevent_context_same_loop().
+        */
+
+       if (ev == msg_ctx->event_ctx) {
+               consumed = messaging_dispatch_classic(msg_ctx, rec);
+               if (consumed) {
+                       return;
+               }
+       }
+
+       consumed = messaging_dispatch_waiters(msg_ctx, ev, rec);
+       if (consumed) {
+               return;
+       }
+
+       if (ev != msg_ctx->event_ctx) {
+               struct iovec iov;
+               int fds[rec->num_fds];
+               int ret;
+
+               /*
+                * We've been listening on a nested event
+                * context. Messages need to be handled in the main
+                * event context, so post to ourselves
+                */
+
+               iov.iov_base = rec->buf.data;
+               iov.iov_len = rec->buf.length;
+
+               for (i=0; i<rec->num_fds; i++) {
+                       fds[i] = rec->fds[i];
+               }
+
+               ret = messaging_post_self(
+                       msg_ctx, rec->src, rec->dest, rec->msg_type,
+                       &iov, 1, fds, rec->num_fds);
+               if (ret == 0) {
+                       return;
+               }
+       }
+
         /*
          * If the fd-array isn't used, just close it.
          */
-       for (j=0; j < rec->num_fds; j++) {
-               int fd = rec->fds[j];
+       for (i=0; i < rec->num_fds; i++) {
+               int fd = rec->fds[i];
                 close(fd);
         }
         rec->num_fds = 0;
@@ -952,6 +1419,7 @@ bool messaging_parent_dgm_cleanup_init(struct messaging_context *msg)
                             60*15),
                 mess_parent_dgm_cleanup, msg);
         if (req == NULL) {
+               DBG_WARNING("background_job_send failed\n");
                 return false;
         }
         tevent_req_set_callback(req, mess_parent_dgm_cleanup_done, msg);