s3-messages: only include messages.h where needed.

[kai/samba.git] / source3 / lib / g_lock.c
diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c

index 8a44b22e11fb78e5b37d93fbbf7ffb71d8e86274..c9e16d0f772d7508865a7c69cca067eba963ace5 100644 (file)
--- a/source3/lib/g_lock.c
+++ b/source3/lib/g_lock.c
@@ -18,7 +18,13 @@
  */
  
  #include "includes.h"
+#include "system/filesys.h"
  #include "g_lock.h"
+#include "librpc/gen_ndr/messaging.h"
+#include "ctdbd_conn.h"
+#include "../lib/util/select.h"
+#include "system/select.h"
+#include "messages.h"
  
  static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
                                     struct server_id pid);
@@ -51,7 +57,7 @@ struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx,
         result->msg = msg;
  
         result->db = db_open(result, lock_path("g_lock.tdb"), 0,
-                            TDB_CLEAR_IF_FIRST, O_RDWR|O_CREAT, 0700);
+                            TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH, O_RDWR|O_CREAT, 0700);
         if (result->db == NULL) {
                 DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
                 TALLOC_FREE(result);
@@ -108,6 +114,34 @@ static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
                               (locks[i].lock_type & G_LOCK_PENDING) ?
                               "(pending)" : "(owner)"));
  
+               if (((locks[i].lock_type & G_LOCK_PENDING) == 0)
+                   && !process_exists(locks[i].pid)) {
+
+                       DEBUGADD(10, ("lock owner %s died -- discarding\n",
+                                     procid_str(talloc_tos(),
+                                                &locks[i].pid)));
+
+                       if (i < (num_locks-1)) {
+                               locks[i] = locks[num_locks-1];
+                       }
+                       num_locks -= 1;
+               }
+       }
+
+       *plocks = locks;
+       *pnum_locks = num_locks;
+       return true;
+}
+
+static void g_lock_cleanup(int *pnum_locks, struct g_lock_rec *locks)
+{
+       int i, num_locks;
+
+       num_locks = *pnum_locks;
+
+       DEBUG(10, ("g_lock_cleanup: %d locks\n", num_locks));
+
+       for (i=0; i<num_locks; i++) {
                 if (process_exists(locks[i].pid)) {
                         continue;
                 }
@@ -119,19 +153,18 @@ static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
                 }
                 num_locks -= 1;
         }
-
-       *plocks = locks;
         *pnum_locks = num_locks;
-       return true;
+       return;
  }
  
  static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
                                         struct g_lock_rec *locks,
-                                       int num_locks,
+                                       int *pnum_locks,
                                         const struct server_id pid,
                                         enum g_lock_type lock_type)
  {
         struct g_lock_rec *result;
+       int num_locks = *pnum_locks;
  
         result = talloc_realloc(mem_ctx, locks, struct g_lock_rec,
                                 num_locks+1);
@@ -141,6 +174,7 @@ static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
  
         result[num_locks].pid = pid;
         result[num_locks].lock_type = lock_type;
+       *pnum_locks += 1;
         return result;
  }
  
@@ -149,10 +183,6 @@ static void g_lock_got_retry(struct messaging_context *msg,
                              uint32_t msg_type,
                              struct server_id server_id,
                              DATA_BLOB *data);
-static void g_lock_timedout(struct tevent_context *ev,
-                           struct tevent_timer *te,
-                           struct timeval current_time,
-                           void *private_data);
  
  static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name,
                                enum g_lock_type lock_type)
@@ -181,7 +211,7 @@ again:
                 goto done;
         }
  
-       self = procid_self();
+       self = messaging_server_id(ctx->msg);
         our_index = -1;
  
         for (i=0; i<num_locks; i++) {
@@ -225,7 +255,7 @@ again:
         if (our_index == -1) {
                 /* First round, add ourself */
  
-               locks = g_lock_addrec(talloc_tos(), locks, num_locks,
+               locks = g_lock_addrec(talloc_tos(), locks, &num_locks,
                                       self, lock_type);
                 if (locks == NULL) {
                         DEBUG(10, ("g_lock_addrec failed\n"));
@@ -241,7 +271,14 @@ again:
                 locks[our_index].lock_type = lock_type;
         }
  
-       data = make_tdb_data((uint8_t *)locks, talloc_get_size(locks));
+       if (NT_STATUS_IS_OK(status) && ((lock_type & G_LOCK_PENDING) == 0)) {
+               /*
+                * Walk through the list of locks, search for dead entries
+                */
+               g_lock_cleanup(&num_locks, locks);
+       }
+
+       data = make_tdb_data((uint8_t *)locks, num_locks * sizeof(*locks));
         store_status = rec->store(rec, data, 0);
         if (!NT_STATUS_IS_OK(store_status)) {
                 DEBUG(1, ("rec->store failed: %s\n",
@@ -267,7 +304,6 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
         NTSTATUS status;
         bool retry = false;
         struct timeval timeout_end;
-       struct timeval timeout_remaining;
         struct timeval time_now;
  
         DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type,
@@ -302,10 +338,11 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
         timeout_end = timeval_sum(&time_now, &timeout);
  
         while (true) {
-               fd_set _r_fds;
-               fd_set *r_fds = NULL;
-               int max_fd = 0;
+               struct pollfd *pollfds;
+               int num_pollfds;
+               int saved_errno;
                 int ret;
+               struct timeval timeout_remaining, select_timeout;
  
                 status = g_lock_trylock(ctx, name, lock_type);
                 if (NT_STATUS_IS_OK(status)) {
@@ -340,7 +377,7 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
                  * for writing and some other process already holds a lock
                  * on the registry.tdb.
                  *
-                * So as a quick fix, we act a little corasely here: we do
+                * So as a quick fix, we act a little coarsely here: we do
                  * a select on the ctdb connection fd and when it is readable
                  * or we get EINTR, then we retry without actually parsing
                  * any ctdb packages or dispatching messages. This means that
@@ -351,22 +388,47 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
                  * events here but have to handcode a timeout.
                  */
  
+               /*
+                * We allocate 2 entries here. One is needed anyway for
+                * sys_poll and in the clustering case we might have to add
+                * the ctdb fd. This avoids the realloc then.
+                */
+               pollfds = TALLOC_ARRAY(talloc_tos(), struct pollfd, 2);
+               if (pollfds == NULL) {
+                       status = NT_STATUS_NO_MEMORY;
+                       break;
+               }
+               num_pollfds = 1;
+
  #ifdef CLUSTER_SUPPORT
                 if (lp_clustering()) {
-                       struct ctdbd_connection *conn = messaging_ctdbd_connection();
+                       struct ctdbd_connection *conn;
+                       conn = messaging_ctdbd_connection();
  
-                       r_fds = &_r_fds;
-                       FD_ZERO(r_fds);
-                       max_fd = ctdbd_conn_get_fd(conn);
-                       FD_SET(max_fd, r_fds);
+                       pollfds[0].fd = ctdbd_conn_get_fd(conn);
+                       pollfds[0].events = POLLIN|POLLHUP;
+
+                       num_pollfds += 1;
                 }
  #endif
  
                 time_now = timeval_current();
                 timeout_remaining = timeval_until(&time_now, &timeout_end);
+               select_timeout = timeval_set(60, 0);
+
+               select_timeout = timeval_min(&select_timeout,
+                                            &timeout_remaining);
+
+               ret = sys_poll(pollfds, num_pollfds,
+                              timeval_to_msec(select_timeout));
  
-               ret = sys_select(max_fd + 1, r_fds, NULL, NULL,
-                                &timeout_remaining);
+               /*
+                * We're not *really interested in the actual flags. We just
+                * need to retry this whole thing.
+                */
+               saved_errno = errno;
+               TALLOC_FREE(pollfds);
+               errno = saved_errno;
  
                 if (ret == -1) {
                         if (errno != EINTR) {
@@ -388,7 +450,7 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
                                 break;
                         } else {
                                 DEBUG(10, ("select returned 0 but timeout not "
-                                          "not expired: strange - retrying\n"));
+                                          "not expired, retrying\n"));
                         }
                 } else if (ret != 1) {
                         DEBUG(1, ("invalid return code of select: %d\n", ret));
@@ -404,7 +466,9 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
                  */
         }
  
+#ifdef CLUSTER_SUPPORT
  done:
+#endif
  
         if (!NT_STATUS_IS_OK(status)) {
                 NTSTATUS unlock_status;
@@ -437,16 +501,6 @@ static void g_lock_got_retry(struct messaging_context *msg,
         *pretry = true;
  }
  
-static void g_lock_timedout(struct tevent_context *ev,
-                           struct tevent_timer *te,
-                           struct timeval current_time,
-                           void *private_data)
-{
-       bool *ptimedout = (bool *)private_data;
-       *ptimedout = true;
-       TALLOC_FREE(te);
-}
-
  static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
                                     struct server_id pid)
  {
@@ -504,14 +558,26 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
                 goto done;
         }
  
+       TALLOC_FREE(rec);
+
         if ((lock_type & G_LOCK_PENDING) == 0) {
+               int num_wakeups = 0;
+
                 /*
-                * We've been the lock holder. Tell all others to retry.
+                * We've been the lock holder. Others to retry. Don't
+                * tell all others to avoid a thundering herd. In case
+                * this leads to a complete stall because we miss some
+                * processes, the loop in g_lock_lock tries at least
+                * once a minute.
                  */
+
                 for (i=0; i<num_locks; i++) {
                         if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
                                 continue;
                         }
+                       if (!process_exists(locks[i].pid)) {
+                               continue;
+                       }
  
                         /*
                          * Ping all waiters to retry
@@ -524,13 +590,22 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
                                           procid_str(talloc_tos(),
                                                      &locks[i].pid),
                                           nt_errstr(status)));
+                       } else {
+                               num_wakeups += 1;
+                       }
+                       if (num_wakeups > 5) {
+                               break;
                         }
                 }
         }
  done:
+       /*
+        * For the error path, TALLOC_FREE(rec) as well. In the good
+        * path we have already freed it.
+        */
+       TALLOC_FREE(rec);
  
         TALLOC_FREE(locks);
-       TALLOC_FREE(rec);
         return status;
  }
  
@@ -538,7 +613,7 @@ NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name)
  {
         NTSTATUS status;
  
-       status = g_lock_force_unlock(ctx, name, procid_self());
+       status = g_lock_force_unlock(ctx, name, messaging_server_id(ctx->msg));
  
  #ifdef CLUSTER_SUPPORT
         if (lp_clustering()) {
@@ -651,3 +726,68 @@ NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name,
         }
         return NT_STATUS_OK;
  }
+
+static bool g_lock_init_all(TALLOC_CTX *mem_ctx,
+                           struct tevent_context **pev,
+                           struct messaging_context **pmsg,
+                           const struct server_id self,
+                           struct g_lock_ctx **pg_ctx)
+{
+       struct tevent_context *ev = NULL;
+       struct messaging_context *msg = NULL;
+       struct g_lock_ctx *g_ctx = NULL;
+
+       ev = tevent_context_init(mem_ctx);
+       if (ev == NULL) {
+               d_fprintf(stderr, "ERROR: could not init event context\n");
+               goto fail;
+       }
+       msg = messaging_init(mem_ctx, self, ev);
+       if (msg == NULL) {
+               d_fprintf(stderr, "ERROR: could not init messaging context\n");
+               goto fail;
+       }
+       g_ctx = g_lock_ctx_init(mem_ctx, msg);
+       if (g_ctx == NULL) {
+               d_fprintf(stderr, "ERROR: could not init g_lock context\n");
+               goto fail;
+       }
+
+       *pev = ev;
+       *pmsg = msg;
+       *pg_ctx = g_ctx;
+       return true;
+fail:
+       TALLOC_FREE(g_ctx);
+       TALLOC_FREE(msg);
+       TALLOC_FREE(ev);
+       return false;
+}
+
+NTSTATUS g_lock_do(const char *name, enum g_lock_type lock_type,
+                  struct timeval timeout, const struct server_id self,
+                  void (*fn)(void *private_data), void *private_data)
+{
+       struct tevent_context *ev = NULL;
+       struct messaging_context *msg = NULL;
+       struct g_lock_ctx *g_ctx = NULL;
+       NTSTATUS status;
+
+       if (!g_lock_init_all(talloc_tos(), &ev, &msg, self, &g_ctx)) {
+               status = NT_STATUS_ACCESS_DENIED;
+               goto done;
+       }
+
+       status = g_lock_lock(g_ctx, name, lock_type, timeout);
+       if (!NT_STATUS_IS_OK(status)) {
+               goto done;
+       }
+       fn(private_data);
+       g_lock_unlock(g_ctx, name);
+
+done:
+       TALLOC_FREE(g_ctx);
+       TALLOC_FREE(msg);
+       TALLOC_FREE(ev);
+       return status;
+}