ctdb-daemon: Rename EventScriptTimeoutCount to MonitorTimeoutCount
[obnox/samba/samba-obnox.git] / ctdb / server / ctdb_monitor.c
index edaae48ff8ed0a0b186aa39ef3623b0aed7465a0..4f998d3d1f569f2009a1670770f7da604fd90872 100644 (file)
    along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 
-#include "includes.h"
+#include "replace.h"
 #include "system/filesys.h"
+#include "system/network.h"
 #include "system/wait.h"
-#include "../include/ctdb_private.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
 
 struct ctdb_monitor_state {
        uint32_t monitoring_mode;
        TALLOC_CTX *monitor_context;
        uint32_t next_interval;
+       uint32_t event_script_timeouts;
 };
 
-static void ctdb_check_health(struct event_context *ev, struct timed_event *te, 
+static void ctdb_check_health(struct tevent_context *ev,
+                             struct tevent_timer *te,
                              struct timeval t, void *private_data);
 
 /*
@@ -113,7 +127,7 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
        uint32_t next_interval;
        int ret;
        TDB_DATA rddata;
-       struct srvid_request rd;
+       struct ctdb_srvid_message rd;
        const char *state_str = NULL;
 
        c.pnn = ctdb->pnn;
@@ -131,14 +145,20 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
        }
 
        if (status == -ETIME) {
-               ctdb->event_script_timeouts++;
+               ctdb->monitor->event_script_timeouts++;
 
-               if (ctdb->event_script_timeouts >= ctdb->tunable.script_timeout_count) {
-                       DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_timeout_count));
+               if (ctdb->monitor->event_script_timeouts >=
+                   ctdb->tunable.monitor_timeout_count) {
+                       DEBUG(DEBUG_ERR,
+                             ("Maximum monitor timeout count %u reached."
+                              " Making node unhealthy\n",
+                              ctdb->tunable.monitor_timeout_count));
                } else {
                        /* We pretend this is OK. */
                        goto after_change_status;
                }
+       } else {
+               ctdb->monitor->event_script_timeouts = 0;
        }
 
        if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
@@ -163,9 +183,9 @@ after_change_status:
                ctdb->monitor->next_interval = ctdb->tunable.monitor_interval;
        }
 
-       event_add_timed(ctdb->ev, ctdb->monitor->monitor_context, 
-                               timeval_current_ofs(next_interval, 0), 
-                               ctdb_check_health, ctdb);
+       tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                        timeval_current_ofs(next_interval, 0),
+                        ctdb_check_health, ctdb);
 
        if (c.old_flags == node->flags) {
                return;
@@ -199,7 +219,8 @@ after_change_status:
 }
 
 
-static void ctdb_run_startup(struct event_context *ev, struct timed_event *te,
+static void ctdb_run_startup(struct tevent_context *ev,
+                            struct tevent_timer *te,
                             struct timeval t, void *private_data);
 /*
   called when the startup event script finishes
@@ -208,9 +229,9 @@ static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p
 {
        if (status != 0) {
                DEBUG(DEBUG_ERR,("startup event failed\n"));
-               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                               timeval_current_ofs(5, 0),
-                               ctdb_run_startup, ctdb);
+               tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                                timeval_current_ofs(5, 0),
+                                ctdb_run_startup, ctdb);
                return;
        }
 
@@ -221,12 +242,13 @@ static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p
 
        ctdb->monitor->monitoring_mode = CTDB_MONITORING_ACTIVE;
 
-       event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                       timeval_current_ofs(ctdb->monitor->next_interval, 0),
-                       ctdb_check_health, ctdb);
+       tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                        timeval_current_ofs(ctdb->monitor->next_interval, 0),
+                        ctdb_check_health, ctdb);
 }
 
-static void ctdb_run_startup(struct event_context *ev, struct timed_event *te,
+static void ctdb_run_startup(struct tevent_context *ev,
+                            struct tevent_timer *te,
                             struct timeval t, void *private_data)
 {
        struct ctdb_context *ctdb = talloc_get_type(private_data,
@@ -241,9 +263,9 @@ static void ctdb_run_startup(struct event_context *ev, struct timed_event *te,
        if (ctdb->runstate < CTDB_RUNSTATE_STARTUP) {
                DEBUG(DEBUG_NOTICE,
                      ("Not yet in startup runstate. Wait one more second\n"));
-               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                               timeval_current_ofs(1, 0),
-                               ctdb_run_startup, ctdb);
+               tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                                timeval_current_ofs(1, 0),
+                                ctdb_run_startup, ctdb);
                return;
        }
 
@@ -258,9 +280,9 @@ static void ctdb_run_startup(struct event_context *ev, struct timed_event *te,
 
        if (ret != 0) {
                DEBUG(DEBUG_ERR,("Unable to launch startup event script\n"));
-               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                               timeval_current_ofs(5, 0),
-                               ctdb_run_startup, ctdb);
+               tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                                timeval_current_ofs(5, 0),
+                                ctdb_run_startup, ctdb);
        }
 }
 
@@ -268,8 +290,9 @@ static void ctdb_run_startup(struct event_context *ev, struct timed_event *te,
   wait until we have finished initial recoveries before we start the
   monitoring events
  */
-static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_event *te, 
-                             struct timeval t, void *private_data)
+static void ctdb_wait_until_recovered(struct tevent_context *ev,
+                                     struct tevent_timer *te,
+                                     struct timeval t, void *private_data)
 {
        struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
        int ret;
@@ -287,9 +310,9 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
        if (ctdb->vnn_map->generation == INVALID_GENERATION) {
                ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 
-               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                                    timeval_current_ofs(1, 0), 
-                                    ctdb_wait_until_recovered, ctdb);
+               tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                                timeval_current_ofs(1, 0),
+                                ctdb_wait_until_recovered, ctdb);
                return;
        }
 
@@ -297,9 +320,9 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
                ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 
                DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n"));
-               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                                    timeval_current_ofs(1, 0), 
-                                    ctdb_wait_until_recovered, ctdb);
+               tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                                timeval_current_ofs(1, 0),
+                                ctdb_wait_until_recovered, ctdb);
                return;
        }
 
@@ -309,18 +332,18 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
 
                DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n"));
 
-               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                                    timeval_current_ofs(1, 0), 
-                                    ctdb_wait_until_recovered, ctdb);
+               tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                                timeval_current_ofs(1, 0),
+                                ctdb_wait_until_recovered, ctdb);
                return;
        }
 
        if (ctdb->vnn_map->generation == ctdb->db_persistent_startup_generation) {
                DEBUG(DEBUG_INFO,(__location__ " skip ctdb_recheck_persistent_health() "
                                  "until the next recovery\n"));
-               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                                    timeval_current_ofs(1, 0),
-                                    ctdb_wait_until_recovered, ctdb);
+               tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                                timeval_current_ofs(1, 0),
+                                ctdb_wait_until_recovered, ctdb);
                return;
        }
 
@@ -334,10 +357,10 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
                              "failed (%llu of %llu times) - retry later\n",
                              (unsigned long long)ctdb->db_persistent_check_errors,
                              (unsigned long long)ctdb->max_persistent_check_errors));
-                       event_add_timed(ctdb->ev,
-                                       ctdb->monitor->monitor_context,
-                                       timeval_current_ofs(1, 0),
-                                       ctdb_wait_until_recovered, ctdb);
+                       tevent_add_timer(ctdb->ev,
+                                        ctdb->monitor->monitor_context,
+                                        timeval_current_ofs(1, 0),
+                                        ctdb_wait_until_recovered, ctdb);
                        return;
                }
                DEBUG(DEBUG_ALERT,(__location__
@@ -349,15 +372,16 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
        }
        ctdb->db_persistent_check_errors = 0;
 
-       event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                       timeval_current(), ctdb_run_startup, ctdb);
+       tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                        timeval_current(), ctdb_run_startup, ctdb);
 }
 
 
 /*
   see if the event scripts think we are healthy
  */
-static void ctdb_check_health(struct event_context *ev, struct timed_event *te, 
+static void ctdb_check_health(struct tevent_context *ev,
+                             struct tevent_timer *te,
                              struct timeval t, void *private_data)
 {
        struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
@@ -376,9 +400,9 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
        }
 
        if (skip_monitoring) {
-               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                               timeval_current_ofs(ctdb->monitor->next_interval, 0),
-                               ctdb_check_health, ctdb);
+               tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                                timeval_current_ofs(ctdb->monitor->next_interval, 0),
+                                ctdb_check_health, ctdb);
                return;
        }
 
@@ -389,9 +413,9 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
        if (ret != 0) {
                DEBUG(DEBUG_ERR,("Unable to launch monitor event script\n"));
                ctdb->monitor->next_interval = 5;
-               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                               timeval_current_ofs(5, 0),
-                               ctdb_check_health, ctdb);
+               tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                                timeval_current_ofs(5, 0),
+                                ctdb_check_health, ctdb);
        }
 }
 
@@ -441,9 +465,9 @@ void ctdb_wait_for_first_recovery(struct ctdb_context *ctdb)
        ctdb->monitor->monitor_context = talloc_new(ctdb->monitor);
        CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor->monitor_context);
 
-       event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
-                       timeval_current_ofs(1, 0),
-                       ctdb_wait_until_recovered, ctdb);
+       tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
+                        timeval_current_ofs(1, 0),
+                        ctdb_wait_until_recovered, ctdb);
 }
 
 
@@ -469,7 +493,7 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
        node->flags   = c->new_flags & ~NODE_FLAGS_DISCONNECTED;
        node->flags  |= (c->old_flags & NODE_FLAGS_DISCONNECTED);
 
-       /* we dont let other nodes modify our STOPPED status */
+       /* we don't let other nodes modify our STOPPED status */
        if (c->pnn == ctdb->pnn) {
                node->flags &= ~NODE_FLAGS_STOPPED;
                if (old_flags & NODE_FLAGS_STOPPED) {
@@ -477,7 +501,7 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
                }
        }
 
-       /* we dont let other nodes modify our BANNED status */
+       /* we don't let other nodes modify our BANNED status */
        if (c->pnn == ctdb->pnn) {
                node->flags &= ~NODE_FLAGS_BANNED;
                if (old_flags & NODE_FLAGS_BANNED) {