Fixes for various issues found by Coverity
[nivanova/samba-autobuild/.git] / ctdb / server / eventscript.c
index 722ebec6a9e475c9cecc9871da20ad27d39bb5c1..10d426fba7f912264d1bd9ec645230a6f188a255 100644 (file)
@@ -24,8 +24,8 @@
 #include "system/dir.h"
 #include "system/locale.h"
 #include "../include/ctdb_private.h"
-#include "lib/tevent/tevent.h"
 #include "../common/rb_tree.h"
+#include "lib/util/dlinklist.h"
 
 static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p);
 
@@ -34,14 +34,22 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
  */
 static void sigterm(int sig)
 {
+       pid_t pid;
+
        /* all the child processes will be running in the same process group */
-       kill(-getpgrp(), SIGKILL);
+       pid = getpgrp();
+       if (pid == -1) {
+               kill(-getpid(), SIGKILL);
+       } else {
+               kill(-pid, SIGKILL);
+       }
        _exit(1);
 }
 
 /* This is attached to the event script state. */
 struct event_script_callback {
-       struct ctdb_event_script_state *state;
+       struct event_script_callback *next, *prev;
+       struct ctdb_context *ctdb;
 
        /* Warning: this can free us! */
        void (*fn)(struct ctdb_context *, int, void *);
@@ -210,6 +218,7 @@ static struct ctdb_scripts_wire *ctdb_get_script_list(struct ctdb_context *ctdb,
                tree_item = talloc(tree, struct ctdb_script_tree_item);
                if (tree_item == NULL) {
                        DEBUG(DEBUG_ERR, (__location__ " Failed to allocate new tree item\n"));
+                       closedir(dir);
                        talloc_free(tmp_ctx);
                        return NULL;
                }
@@ -222,6 +231,7 @@ static struct ctdb_scripts_wire *ctdb_get_script_list(struct ctdb_context *ctdb,
                tree_item->name = talloc_strdup(tree_item, de->d_name);
                if (tree_item->name == NULL) {
                        DEBUG(DEBUG_ERR,(__location__ " Failed to allocate script name.\n"));
+                       closedir(dir);
                        talloc_free(tmp_ctx);
                        return NULL;
                }
@@ -399,6 +409,7 @@ static int fork_child_for_script(struct ctdb_context *ctdb,
                                              ctdb_eventscript_call_names[state->call]);
                close(state->fd[0]);
                set_close_on_exec(state->fd[1]);
+               ctdb_set_process_name("ctdb_eventscript");
 
                rt = child_run_script(ctdb, state->from_user, state->call, state->options, current);
                /* We must be able to write PIPEBUF bytes at least; if this
@@ -472,7 +483,7 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
        /* valgrind gets overloaded if we run next script as it's still doing
         * post-execution analysis, so kill finished child here. */
        if (ctdb->valgrinding) {
-               kill(state->child, SIGKILL);
+               ctdb_kill(ctdb, state->child, SIGKILL);
        }
 
        state->child = 0;
@@ -502,15 +513,14 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
        }
 }
 
-static void debug_timeout(struct ctdb_event_script_state *state)
+static void ctdb_run_debug_hung_script(struct ctdb_context *ctdb, struct ctdb_event_script_state *state)
 {
        struct ctdb_script_wire *current = get_current_script(state);
        char *cmd;
        pid_t pid;
-       time_t t;
-       char tbuf[100], buf[200];
+       const char * debug_hung_script = ETCDIR "/ctdb/debug-hung-script.sh";
 
-       cmd = child_command_string(state->ctdb, state,
+       cmd = child_command_string(ctdb, state,
                                   state->from_user, current->name,
                                   state->call, state->options);
        CTDB_NO_MEMORY_VOID(state->ctdb, cmd);
@@ -519,26 +529,37 @@ static void debug_timeout(struct ctdb_event_script_state *state)
                         cmd, timeval_elapsed(&current->start), state->child));
        talloc_free(cmd);
 
-       t = time(NULL);
-       strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S",  localtime(&t));
-       sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
-                       " >/tmp/ctdb.event.%s.%d", tbuf, getpid());
-
-       pid = ctdb_fork(state->ctdb);
-       if (pid == 0) {
-               system(buf);
-               /* Now we can kill the child */
-               kill(state->child, SIGTERM);
-               exit(0);
+       if (!ctdb_fork_with_logging(ctdb, ctdb, "Hung script", NULL, NULL, &pid)) {
+               DEBUG(DEBUG_ERR,("Failed to fork a child process with logging to track hung event script\n"));
+               ctdb_kill(state->ctdb, state->child, SIGTERM);
+               return;
        }
        if (pid == -1) {
                DEBUG(DEBUG_ERR,("Fork for debug script failed : %s\n",
                                 strerror(errno)));
-       } else {
-               DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
-               /* Don't kill child until timeout done. */
-               state->child = 0;
+               ctdb_kill(state->ctdb, state->child, SIGTERM);
+               return;
+       }
+       if (pid == 0) {
+               char *buf;
+
+               ctdb_set_process_name("ctdb_debug_hung_script");
+               if (getenv("CTDB_DEBUG_HUNG_SCRIPT") != NULL) {
+                       debug_hung_script = getenv("CTDB_DEBUG_HUNG_SCRIPT");
+               }
+
+               buf = talloc_asprintf(NULL, "%s %d",
+                                     debug_hung_script, state->child);
+               system(buf);
+               talloc_free(buf);
+
+               /* Now we can kill the child */
+               ctdb_kill(state->ctdb, state->child, SIGTERM);
+               _exit(0);
        }
+
+       /* Don't kill child until timeout done. */
+       state->child = 0;
 }
 
 /* called when child times out */
@@ -558,15 +579,14 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
        case CTDB_EVENT_RECOVERED:
        case CTDB_EVENT_TAKE_IP:
        case CTDB_EVENT_RELEASE_IP:
-       case CTDB_EVENT_STOPPED:
-       case CTDB_EVENT_MONITOR:
        case CTDB_EVENT_STATUS:
                state->scripts->scripts[state->current].status = 0;
                DEBUG(DEBUG_ERR,("Ignoring hung script for %s call %d\n", state->options, state->call));
+               ctdb_run_debug_hung_script(ctdb, state);
                break;
         default:
                state->scripts->scripts[state->current].status = -ETIME;
-               debug_timeout(state);
+               ctdb_run_debug_hung_script(ctdb, state);
        }
 
        talloc_free(state);
@@ -583,7 +603,7 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
        if (state->child) {
                DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
 
-               if (kill(state->child, SIGTERM) != 0) {
+               if (ctdb_kill(state->ctdb, state->child, SIGTERM) != 0) {
                        DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
                }
        }
@@ -611,8 +631,18 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
        }
 
        /* This is allowed to free us; talloc will prevent double free anyway,
-        * but beware if you call this outside the destructor! */
-       callback = state->callback;
+        * but beware if you call this outside the destructor!
+        * the callback hangs off a different context so we walk the list
+        * of "active" callbacks until we find the one state points to.
+        * if we cant find it it means the callback has been removed.
+        */
+       for (callback = state->ctdb->script_callbacks; callback != NULL; callback = callback->next) {
+               if (callback == state->callback) {
+                       break;
+               }
+       }
+       
+       state->callback = NULL;
 
        if (callback) {
                /* Make sure destructor doesn't free itself! */
@@ -646,7 +676,6 @@ static bool check_options(enum ctdb_eventscript_call call, const char *options)
        case CTDB_EVENT_STARTUP:
        case CTDB_EVENT_START_RECOVERY:
        case CTDB_EVENT_RECOVERED:
-       case CTDB_EVENT_STOPPED:
        case CTDB_EVENT_MONITOR:
        case CTDB_EVENT_STATUS:
        case CTDB_EVENT_SHUTDOWN:
@@ -669,8 +698,7 @@ static bool check_options(enum ctdb_eventscript_call call, const char *options)
 
 static int remove_callback(struct event_script_callback *callback)
 {
-       /* Detach ourselves from the running script state */
-       callback->state->callback = NULL;
+       DLIST_REMOVE(callback->ctdb->script_callbacks, callback);
        return 0;
 }
 
@@ -688,35 +716,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
 {
        struct ctdb_event_script_state *state;
 
-       state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state);
-       CTDB_NO_MEMORY(ctdb, state);
-
-       /* The callback isn't done if the context is freed. */
-       state->callback = talloc(mem_ctx, struct event_script_callback);
-       CTDB_NO_MEMORY(ctdb, state->callback);
-       talloc_set_destructor(state->callback, remove_callback);
-       state->callback->state = state;
-       state->callback->fn = callback;
-       state->callback->private_data = private_data;
-
-       state->ctdb = ctdb;
-       state->from_user = from_user;
-       state->call = call;
-       state->options = talloc_vasprintf(state, fmt, ap);
-       state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
-       state->scripts = NULL;
-       if (state->options == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
-               talloc_free(state);
-               return -1;
-       }
-       if (!check_options(state->call, state->options)) {
-               DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for %s\n",
-                                 ctdb_eventscript_call_names[state->call], state->options));
-               talloc_free(state);
-               return -1;
-       }
-
        if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
                /* we guarantee that only some specifically allowed event scripts are run
                   while in recovery */
@@ -726,7 +725,7 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
                        CTDB_EVENT_START_RECOVERY,
                        CTDB_EVENT_SHUTDOWN,
                        CTDB_EVENT_RELEASE_IP,
-                       CTDB_EVENT_STOPPED
+                       CTDB_EVENT_IPREALLOCATED,
                };
                int i;
                for (i=0;i<ARRAY_SIZE(allowed_calls);i++) {
@@ -735,7 +734,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
                if (i == ARRAY_SIZE(allowed_calls)) {
                        DEBUG(DEBUG_ERR,("Refusing to run event scripts call '%s' while in recovery\n",
                                 ctdb_eventscript_call_names[call]));
-                       talloc_free(state);
                        return -1;
                }
        }
@@ -744,8 +742,9 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
        if (ctdb->current_monitor) {
                struct ctdb_event_script_state *ms = talloc_get_type(ctdb->current_monitor, struct ctdb_event_script_state);
 
-               /* cancel it */
-               if (ms->callback != NULL) {
+               /* Cancel current monitor callback state only if monitoring
+                * context ctdb->monitor->monitor_context has not been freed */
+               if (ms->callback != NULL && !ctdb_stopped_monitoring(ctdb)) {
                        ms->callback->fn(ctdb, -ECANCELED, ms->callback->private_data);
                        talloc_free(ms->callback);
                }
@@ -757,6 +756,36 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
                ctdb->current_monitor = NULL;
        }
 
+       state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state);
+       CTDB_NO_MEMORY(ctdb, state);
+
+       /* The callback isn't done if the context is freed. */
+       state->callback = talloc(mem_ctx, struct event_script_callback);
+       CTDB_NO_MEMORY(ctdb, state->callback);
+       DLIST_ADD(ctdb->script_callbacks, state->callback);
+       talloc_set_destructor(state->callback, remove_callback);
+       state->callback->ctdb         = ctdb;
+       state->callback->fn           = callback;
+       state->callback->private_data = private_data;
+
+       state->ctdb = ctdb;
+       state->from_user = from_user;
+       state->call = call;
+       state->options = talloc_vasprintf(state, fmt, ap);
+       state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
+       state->scripts = NULL;
+       if (state->options == NULL) {
+               DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
+               talloc_free(state);
+               return -1;
+       }
+       if (!check_options(state->call, state->options)) {
+               DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for %s\n",
+                                 ctdb_eventscript_call_names[state->call], state->options));
+               talloc_free(state);
+               return -1;
+       }
+
        DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n",
                          ctdb_eventscript_call_names[state->call],
                          state->options));
@@ -853,10 +882,10 @@ int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_eventscript_call
        va_start(ap, fmt);
        ret = ctdb_event_script_callback_v(ctdb, ctdb,
                        event_script_callback, &status, false, call, fmt, ap);
+       va_end(ap);
        if (ret != 0) {
                return ret;
        }
-       va_end(ap);
 
        status.status = -1;
        status.done = false;
@@ -868,7 +897,11 @@ int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_eventscript_call
                                  " Immediately banning ourself for %d seconds\n",
                                  ctdb_eventscript_call_names[call],
                                  ctdb->tunable.recovery_ban_period));
-               ctdb_ban_self(ctdb);
+
+               /* Don't ban self if CTDB is starting up or shutting down */
+               if (call != CTDB_EVENT_INIT && call != CTDB_EVENT_SHUTDOWN) {
+                       ctdb_ban_self(ctdb);
+               }
        }
 
        return status.status;
@@ -896,7 +929,7 @@ static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,
        ctdb_enable_monitoring(ctdb);
 
        if (status != 0) {
-               DEBUG(DEBUG_ERR,(__location__ " Failed to forcibly run eventscripts\n"));
+               DEBUG(DEBUG_ERR,(__location__ " Failed to run eventscripts\n"));
        }
 
        ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
@@ -942,7 +975,7 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
        /* Figure out what call they want. */
        options = get_call((const char *)indata.dptr, &call);
        if (!options) {
-               DEBUG(DEBUG_ERR, (__location__ " Invalid forced \"%s\"\n", (const char *)indata.dptr));
+               DEBUG(DEBUG_ERR, (__location__ " Invalid event name \"%s\"\n", (const char *)indata.dptr));
                return -1;
        }
 
@@ -956,7 +989,7 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
 
        state->c = talloc_steal(state, c);
 
-       DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
+       DEBUG(DEBUG_NOTICE,("Running eventscripts with arguments %s\n", indata.dptr));
 
        ctdb_disable_monitoring(ctdb);