s3:ctdb_conn: add ctdbd_conn_get_fd() to get the fd out of the ctdb connection
[amitay/samba.git] / source3 / lib / ctdbd_conn.c
index 10a65c5bccafd50154d3751bad9ca449c03ef380..6b50009e003252917dc1e79d9d349825cbd8fea7 100644 (file)
@@ -3,17 +3,17 @@
    Samba internal messaging functions
    Copyright (C) 2007 by Volker Lendecke
    Copyright (C) 2007 by Andrew Tridgell
-   
+
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.
-   
+
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
@@ -36,7 +36,7 @@ struct ctdbd_connection {
        uint64 rand_srvid;
        struct packet_context *pkt;
        struct fd_event *fde;
-       
+
        void (*release_ip_handler)(const char *ip_addr, void *private_data);
        void *release_ip_priv;
 };
@@ -155,17 +155,17 @@ static NTSTATUS ctdbd_connect(TALLOC_CTX *mem_ctx,
  * Do we have a complete ctdb packet in the queue?
  */
 
-static bool ctdb_req_complete(const DATA_BLOB *data,
+static bool ctdb_req_complete(const uint8_t *buf, size_t available,
                              size_t *length,
                              void *private_data)
 {
        uint32 msglen;
 
-       if (data->length < sizeof(msglen)) {
+       if (available < sizeof(msglen)) {
                return False;
        }
 
-       msglen = *((uint32 *)data->data);
+       msglen = *((uint32 *)buf);
 
        DEBUG(10, ("msglen = %d\n", msglen));
 
@@ -176,12 +176,12 @@ static bool ctdb_req_complete(const DATA_BLOB *data,
                cluster_fatal("ctdbd protocol error\n");
        }
 
-       if (data->length >= msglen) {
-               *length = msglen;
-               return True;
+       if (available < msglen) {
+               return false;
        }
 
-       return False;
+       *length = msglen;
+       return true;
 }
 
 /*
@@ -200,7 +200,7 @@ struct deferred_msg_state {
 
 static void deferred_message_dispatch(struct event_context *event_ctx,
                                      struct timed_event *te,
-                                     const struct timeval *now,
+                                     struct timeval now,
                                      void *private_data)
 {
        struct deferred_msg_state *state = talloc_get_type_abort(
@@ -220,16 +220,13 @@ struct req_pull_state {
  * Pull a ctdb request out of the incoming packet queue
  */
 
-static NTSTATUS ctdb_req_pull(const DATA_BLOB *data,
+static NTSTATUS ctdb_req_pull(uint8_t *buf, size_t length,
                              void *private_data)
 {
        struct req_pull_state *state = (struct req_pull_state *)private_data;
 
-       state->req = data_blob_talloc(state->mem_ctx, data->data,
-                                     data->length);
-       if (state->req.data == NULL) {
-               return NT_STATUS_NO_MEMORY;
-       }
+       state->req.data = talloc_move(state->mem_ctx, &buf);
+       state->req.length = length;
        return NT_STATUS_OK;
 }
 
@@ -278,6 +275,17 @@ static struct messaging_rec *ctdb_pull_messaging_rec(TALLOC_CTX *mem_ctx,
        return result;
 }
 
+static NTSTATUS ctdb_packet_fd_read_sync(struct packet_context *ctx)
+{
+       struct timeval timeout;
+       struct timeval *ptimeout;
+
+       timeout = timeval_set(lp_ctdb_timeout(), 0);
+       ptimeout = (timeout.tv_sec != 0) ? &timeout : NULL;
+
+       return packet_fd_read_sync(ctx, ptimeout);
+}
+
 /*
  * Read a full ctdbd request. If we have a messaging context, defer incoming
  * messages that might come in between.
@@ -292,7 +300,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
 
  again:
 
-       status = packet_fd_read_sync(conn->pkt);
+       status = ctdb_packet_fd_read_sync(conn->pkt);
 
        if (NT_STATUS_EQUAL(status, NT_STATUS_NETWORK_BUSY)) {
                /* EAGAIN */
@@ -342,7 +350,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
                                  (long long unsigned)msg->srvid));
                        goto next_pkt;
                }
-               
+
                if ((conn->release_ip_handler != NULL)
                    && (msg->srvid == CTDB_SRVID_RELEASE_IP)) {
                        /* must be dispatched immediately */
@@ -353,15 +361,23 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
                        goto next_pkt;
                }
 
-               if (msg->srvid == CTDB_SRVID_RECONFIGURE) {
-                       DEBUG(0,("Got cluster reconfigure message in ctdb_read_req\n"));
+               if ((msg->srvid == CTDB_SRVID_RECONFIGURE)
+                   || (msg->srvid == CTDB_SRVID_SAMBA_NOTIFY)) {
+
+                       DEBUG(1, ("ctdb_read_req: Got %s message\n",
+                                 (msg->srvid == CTDB_SRVID_RECONFIGURE)
+                                 ? "cluster reconfigure" : "SAMBA_NOTIFY"));
+
                        messaging_send(conn->msg_ctx, procid_self(),
                                       MSG_SMB_BRL_VALIDATE, &data_blob_null);
+                       messaging_send(conn->msg_ctx, procid_self(),
+                                      MSG_DBWRAP_G_LOCK_RETRY,
+                                      &data_blob_null);
                        TALLOC_FREE(hdr);
                        goto next_pkt;
                }
 
-               if (!(msg_state = TALLOC_P(NULL, struct deferred_msg_state))) {
+               if (!(msg_state = TALLOC_P(talloc_autofree_context(), struct deferred_msg_state))) {
                        DEBUG(0, ("talloc failed\n"));
                        TALLOC_FREE(hdr);
                        goto next_pkt;
@@ -378,7 +394,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
                TALLOC_FREE(hdr);
 
                msg_state->msg_ctx = conn->msg_ctx;
-               
+
                /*
                 * We're waiting for a call reply, but an async message has
                 * crossed. Defer dispatching to the toplevel event loop.
@@ -386,7 +402,6 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
                evt = event_add_timed(conn->msg_ctx->event_ctx,
                                      conn->msg_ctx->event_ctx,
                                      timeval_zero(),
-                                     "deferred_message_dispatch",
                                      deferred_message_dispatch,
                                      msg_state);
                if (evt == NULL) {
@@ -395,7 +410,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
                        TALLOC_FREE(hdr);
                        goto next_pkt;
                }
-               
+
                goto next_pkt;
        }
 
@@ -486,6 +501,11 @@ NTSTATUS ctdbd_messaging_connection(TALLOC_CTX *mem_ctx,
                goto fail;
        }
 
+       status = register_with_ctdbd(conn, CTDB_SRVID_SAMBA_NOTIFY);
+       if (!NT_STATUS_IS_OK(status)) {
+               goto fail;
+       }
+
        *pconn = conn;
        return NT_STATUS_OK;
 
@@ -494,10 +514,20 @@ NTSTATUS ctdbd_messaging_connection(TALLOC_CTX *mem_ctx,
        return status;
 }
 
+struct messaging_context *ctdb_conn_msg_ctx(struct ctdbd_connection *conn)
+{
+       return conn->msg_ctx;
+}
+
+int ctdbd_conn_get_fd(struct ctdbd_connection *conn)
+{
+       return packet_get_fd(conn->pkt);
+}
+
 /*
  * Packet handler to receive and handle a ctdb message
  */
-static NTSTATUS ctdb_handle_message(const DATA_BLOB *data,
+static NTSTATUS ctdb_handle_message(uint8_t *buf, size_t length,
                                    void *private_data)
 {
        struct ctdbd_connection *conn = talloc_get_type_abort(
@@ -505,11 +535,12 @@ static NTSTATUS ctdb_handle_message(const DATA_BLOB *data,
        struct ctdb_req_message *msg;
        struct messaging_rec *msg_rec;
 
-       msg = (struct ctdb_req_message *)data->data;
+       msg = (struct ctdb_req_message *)buf;
 
        if (msg->hdr.operation != CTDB_REQ_MESSAGE) {
                DEBUG(0, ("Received async msg of type %u, discarding\n",
                          msg->hdr.operation));
+               TALLOC_FREE(buf);
                return NT_STATUS_INVALID_PARAMETER;
        }
 
@@ -519,46 +550,49 @@ static NTSTATUS ctdb_handle_message(const DATA_BLOB *data,
                DEBUG(10, ("received CTDB_SRVID_RELEASE_IP\n"));
                conn->release_ip_handler((const char *)msg->data,
                                         conn->release_ip_priv);
+               TALLOC_FREE(buf);
                return NT_STATUS_OK;
        }
 
        SMB_ASSERT(conn->msg_ctx != NULL);
 
-       if (msg->srvid == CTDB_SRVID_RECONFIGURE) {
+       if ((msg->srvid == CTDB_SRVID_RECONFIGURE)
+           || (msg->srvid == CTDB_SRVID_SAMBA_NOTIFY)){
                DEBUG(0,("Got cluster reconfigure message\n"));
                /*
-                * when the cluster is reconfigured, we need to clean the brl
-                * database
+                * when the cluster is reconfigured or someone of the
+                * family has passed away (SAMBA_NOTIFY), we need to
+                * clean the brl database
                 */
                messaging_send(conn->msg_ctx, procid_self(),
                               MSG_SMB_BRL_VALIDATE, &data_blob_null);
 
-               /*
-                * it's possible that we have just rejoined the cluster after
-                * an outage. In that case our pending locks could have been
-                * removed from the lockdb, so retry them once more
-                */
-               message_send_all(conn->msg_ctx, MSG_SMB_UNLOCK, NULL, 0, NULL);
+               messaging_send(conn->msg_ctx, procid_self(),
+                              MSG_DBWRAP_G_LOCK_RETRY,
+                              &data_blob_null);
 
+               TALLOC_FREE(buf);
                return NT_STATUS_OK;
-               
        }
 
        /* only messages to our pid or the broadcast are valid here */
        if (msg->srvid != sys_getpid() && msg->srvid != MSG_SRVID_SAMBA) {
                DEBUG(0,("Got unexpected message with srvid=%llu\n", 
                         (unsigned long long)msg->srvid));
+               TALLOC_FREE(buf);
                return NT_STATUS_OK;
        }
 
-       if (!(msg_rec = ctdb_pull_messaging_rec(NULL, data->length, msg))) {
+       if (!(msg_rec = ctdb_pull_messaging_rec(NULL, length, msg))) {
                DEBUG(10, ("ctdb_pull_messaging_rec failed\n"));
+               TALLOC_FREE(buf);
                return NT_STATUS_NO_MEMORY;
        }
 
        messaging_dispatch_rec(conn->msg_ctx, msg_rec);
 
        TALLOC_FREE(msg_rec);
+       TALLOC_FREE(buf);
        return NT_STATUS_OK;
 }
 
@@ -892,7 +926,7 @@ NTSTATUS ctdbd_migrate(struct ctdbd_connection *conn, uint32 db_id,
        NTSTATUS status;
 
        ZERO_STRUCT(req);
-       
+
        req.hdr.length = offsetof(struct ctdb_req_call, data) + key.dsize;
        req.hdr.ctdb_magic   = CTDB_MAGIC;
        req.hdr.ctdb_version = CTDB_VERSION;
@@ -954,7 +988,7 @@ NTSTATUS ctdbd_fetch(struct ctdbd_connection *conn, uint32 db_id,
        NTSTATUS status;
 
        ZERO_STRUCT(req);
-       
+
        req.hdr.length = offsetof(struct ctdb_req_call, data) + key.dsize;
        req.hdr.ctdb_magic   = CTDB_MAGIC;
        req.hdr.ctdb_version = CTDB_VERSION;
@@ -1025,7 +1059,7 @@ struct ctdbd_traverse_state {
  * Handle a traverse record coming in on the ctdbd connection
  */
 
-static NTSTATUS ctdb_traverse_handler(const DATA_BLOB *blob,
+static NTSTATUS ctdb_traverse_handler(uint8_t *buf, size_t length,
                                      void *private_data)
 {
        struct ctdbd_traverse_state *state =
@@ -1035,11 +1069,11 @@ static NTSTATUS ctdb_traverse_handler(const DATA_BLOB *blob,
        struct ctdb_rec_data *d;
        TDB_DATA key, data;
 
-       m = (struct ctdb_req_message *)blob->data;
+       m = (struct ctdb_req_message *)buf;
 
-       if (blob->length < sizeof(*m) || m->hdr.length != blob->length) {
-               DEBUG(0, ("Got invalid message of length %d\n",
-                         (int)blob->length));
+       if (length < sizeof(*m) || m->hdr.length != length) {
+               DEBUG(0, ("Got invalid message of length %d\n", (int)length));
+               TALLOC_FREE(buf);
                return NT_STATUS_UNEXPECTED_IO_ERROR;
        }
 
@@ -1047,6 +1081,7 @@ static NTSTATUS ctdb_traverse_handler(const DATA_BLOB *blob,
        if (m->datalen < sizeof(uint32_t) || m->datalen != d->length) {
                DEBUG(0, ("Got invalid traverse data of length %d\n",
                          (int)m->datalen));
+               TALLOC_FREE(buf);
                return NT_STATUS_UNEXPECTED_IO_ERROR;
        }
 
@@ -1063,6 +1098,7 @@ static NTSTATUS ctdb_traverse_handler(const DATA_BLOB *blob,
        if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
                DEBUG(0, ("Got invalid ltdb header length %d\n",
                          (int)data.dsize));
+               TALLOC_FREE(buf);
                return NT_STATUS_UNEXPECTED_IO_ERROR;
        }
        data.dsize -= sizeof(struct ctdb_ltdb_header);
@@ -1072,6 +1108,7 @@ static NTSTATUS ctdb_traverse_handler(const DATA_BLOB *blob,
                state->fn(key, data, state->private_data);
        }
 
+       TALLOC_FREE(buf);
        return NT_STATUS_OK;
 }
 
@@ -1095,6 +1132,11 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
        struct ctdbd_traverse_state state;
 
        status = ctdbd_init_connection(NULL, &conn);
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(0, ("ctdbd_init_connection failed: %s\n",
+                         nt_errstr(status)));
+               return status;
+       }
 
        t.db_id = db_id;
        t.srvid = conn->rand_srvid;
@@ -1146,7 +1188,7 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
                        break;
                }
 
-               status = packet_fd_read_sync(conn->pkt);
+               status = ctdb_packet_fd_read_sync(conn->pkt);
 
                if (NT_STATUS_EQUAL(status, NT_STATUS_RETRY)) {
                        /*
@@ -1157,6 +1199,7 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
 
                if (NT_STATUS_EQUAL(status, NT_STATUS_END_OF_FILE)) {
                        status = NT_STATUS_OK;
+                       break;
                }
 
                if (!NT_STATUS_IS_OK(status)) {
@@ -1170,26 +1213,84 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
        return status;
 }
 
+/*
+   This is used to canonicalize a ctdb_sock_addr structure.
+*/
+static void smbd_ctdb_canonicalize_ip(const struct sockaddr_storage *in,
+                                     struct sockaddr_storage *out)
+{
+       memcpy(out, in, sizeof (*out));
+
+#ifdef HAVE_IPV6
+       if (in->ss_family == AF_INET6) {
+               const char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
+               const struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)in;
+               struct sockaddr_in *out4 = (struct sockaddr_in *)out;
+               if (memcmp(&in6->sin6_addr, prefix, 12) == 0) {
+                       memset(out, 0, sizeof(*out));
+#ifdef HAVE_SOCK_SIN_LEN
+                       out4->sin_len = sizeof(*out);
+#endif
+                       out4->sin_family = AF_INET;
+                       out4->sin_port   = in6->sin6_port;
+                       memcpy(&out4->sin_addr, &in6->sin6_addr.s6_addr32[3], 4);
+               }
+       }
+#endif
+}
+
 /*
  * Register us as a server for a particular tcp connection
  */
 
 NTSTATUS ctdbd_register_ips(struct ctdbd_connection *conn,
-                           const struct sockaddr_in *server,
-                           const struct sockaddr_in *client,
+                           const struct sockaddr_storage *_server,
+                           const struct sockaddr_storage *_client,
                            void (*release_ip_handler)(const char *ip_addr,
                                                       void *private_data),
                            void *private_data)
 {
-       struct ctdb_control_tcp p;
+       /*
+        * we still use ctdb_control_tcp for ipv4
+        * because we want to work against older ctdb
+        * versions at runtime
+        */
+       struct ctdb_control_tcp p4;
+#ifdef HAVE_STRUCT_CTDB_CONTROL_TCP_ADDR
+       struct ctdb_control_tcp_addr p;
+#endif
        TDB_DATA data;
        NTSTATUS status;
+       struct sockaddr_storage client;
+       struct sockaddr_storage server;
 
        /*
         * Only one connection so far
         */
        SMB_ASSERT(conn->release_ip_handler == NULL);
 
+       smbd_ctdb_canonicalize_ip(_client, &client);
+       smbd_ctdb_canonicalize_ip(_server, &server);
+
+       switch (client.ss_family) {
+       case AF_INET:
+               p4.dest = *(struct sockaddr_in *)&server;
+               p4.src = *(struct sockaddr_in *)&client;
+               data.dptr = (uint8_t *)&p4;
+               data.dsize = sizeof(p4);
+               break;
+#ifdef HAVE_STRUCT_CTDB_CONTROL_TCP_ADDR
+       case AF_INET6:
+               p.dest.ip6 = *(struct sockaddr_in6 *)&server;
+               p.src.ip6 = *(struct sockaddr_in6 *)&client;
+               data.dptr = (uint8_t *)&p;
+               data.dsize = sizeof(p);
+               break;
+#endif
+       default:
+               return NT_STATUS_INTERNAL_ERROR;
+       }
+
        conn->release_ip_handler = release_ip_handler;
 
        /*
@@ -1201,17 +1302,11 @@ NTSTATUS ctdbd_register_ips(struct ctdbd_connection *conn,
                return status;
        }
 
-       p.dest = *server;
-       p.src = *client;
-
        /*
         * inform ctdb of our tcp connection, so if IP takeover happens ctdb
         * can send an extra ack to trigger a reset for our client, so it
         * immediately reconnects
         */
-       data.dptr = (uint8_t *)&p;
-       data.dsize = sizeof(p);
-
        return ctdbd_control(conn, CTDB_CURRENT_NODE, 
                             CTDB_CONTROL_TCP_CLIENT, 0,
                             CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL, NULL);
@@ -1236,6 +1331,50 @@ NTSTATUS ctdbd_control_local(struct ctdbd_connection *conn, uint32 opcode,
        return ctdbd_control(conn, CTDB_CURRENT_NODE, opcode, srvid, flags, data, mem_ctx, outdata, cstatus);
 }
 
+NTSTATUS ctdb_watch_us(struct ctdbd_connection *conn)
+{
+       struct ctdb_client_notify_register reg_data;
+       size_t struct_len;
+       NTSTATUS status;
+       int cstatus;
+
+       reg_data.srvid = CTDB_SRVID_SAMBA_NOTIFY;
+       reg_data.len = 1;
+       reg_data.notify_data[0] = 0;
+
+       struct_len = offsetof(struct ctdb_client_notify_register,
+                             notify_data) + reg_data.len;
+
+       status = ctdbd_control_local(
+               conn, CTDB_CONTROL_REGISTER_NOTIFY, conn->rand_srvid, 0,
+               make_tdb_data((uint8_t *)&reg_data, struct_len),
+               NULL, NULL, &cstatus);
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(1, ("ctdbd_control_local failed: %s\n",
+                         nt_errstr(status)));
+       }
+       return status;
+}
+
+NTSTATUS ctdb_unwatch(struct ctdbd_connection *conn)
+{
+       struct ctdb_client_notify_deregister dereg_data;
+       NTSTATUS status;
+       int cstatus;
+
+       dereg_data.srvid = CTDB_SRVID_SAMBA_NOTIFY;
+
+       status = ctdbd_control_local(
+               conn, CTDB_CONTROL_DEREGISTER_NOTIFY, conn->rand_srvid, 0,
+               make_tdb_data((uint8_t *)&dereg_data, sizeof(dereg_data)),
+               NULL, NULL, &cstatus);
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(1, ("ctdbd_control_local failed: %s\n",
+                         nt_errstr(status)));
+       }
+       return status;
+}
+
 #else
 
 NTSTATUS ctdbd_init_connection(TALLOC_CTX *mem_ctx,