X-Git-Url: http://git.samba.org/?a=blobdiff_plain;f=server%2Feventscript.c;h=9ba3a5d9ed802bdb2fe7c1f52bd506f6b7d4f080;hb=ebbebeec3b511ffb66bca506abc1bb867911ae95;hp=803ac1daa004e14f5b23ffc578ee3eaaca5ffd70;hpb=2ee86cc1f311d7b7504c7b14d142b9c4f6f4b469;p=sahlberg%2Fctdb.git diff --git a/server/eventscript.c b/server/eventscript.c index 803ac1da..9ba3a5d9 100644 --- a/server/eventscript.c +++ b/server/eventscript.c @@ -24,36 +24,16 @@ #include "system/dir.h" #include "system/locale.h" #include "../include/ctdb_private.h" -#include "lib/events/events.h" +#include "lib/tevent/tevent.h" #include "../common/rb_tree.h" -static struct { - struct timeval start; - const char *script_running; -} child_state; - static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p); /* - ctdbd sends us a SIGTERM when we should time out the current script + ctdbd sends us a SIGTERM when we should die. */ static void sigterm(int sig) { - char tbuf[100], buf[200]; - time_t t; - - DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n", - child_state.script_running, timeval_elapsed(&child_state.start), getpid())); - - t = time(NULL); - - strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t)); - sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }" - " >/tmp/ctdb.event.%s.%d", tbuf, getpid()); - system(buf); - - DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf)); - /* all the child processes will be running in the same process group */ kill(-getpgrp(), SIGKILL); _exit(1); @@ -64,7 +44,6 @@ struct ctdb_event_script_state { pid_t child; /* Warning: this can free us! */ void (*callback)(struct ctdb_context *, int, void *); - int cb_status; int fd[2]; void *private_data; bool from_user; @@ -88,9 +67,16 @@ static void log_event_script_output(const char *str, uint16_t len, void *p) { struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state); - struct ctdb_script_wire *current = get_current_script(state); + struct ctdb_script_wire *current; unsigned int slen, min; + /* We may have been aborted to run something else. Discard */ + if (state->scripts == NULL) { + return; + } + + current = get_current_script(state); + /* Append, but don't overfill buffer. It starts zero-filled. */ slen = strlen(current->output); min = MIN(len, sizeof(current->output) - slen - 1); @@ -139,7 +125,7 @@ static bool check_executable(const char *dir, const char *name) } if (!(st.st_mode & S_IXUSR)) { - DEBUG(DEBUG_INFO,("Event script %s is not executable. Ignoring this event script\n", full)); + DEBUG(DEBUG_DEBUG,("Event script %s is not executable. Ignoring this event script\n", full)); errno = ENOEXEC; talloc_free(full); return false; @@ -349,7 +335,6 @@ static int child_run_script(struct ctdb_context *ctdb, int ret; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); - child_state.start = timeval_current(); ret = child_setup(ctdb); if (ret != 0) goto out; @@ -357,9 +342,8 @@ static int child_run_script(struct ctdb_context *ctdb, cmdstr = child_command_string(ctdb, tmp_ctx, from_user, current->name, call, options); CTDB_NO_MEMORY(ctdb, cmdstr); - child_state.script_running = cmdstr; - DEBUG(DEBUG_INFO,("Executing event script %s\n",cmdstr)); + DEBUG(DEBUG_DEBUG,("Executing event script %s\n",cmdstr)); if (current->status) { ret = current->status; @@ -379,6 +363,7 @@ static int fork_child_for_script(struct ctdb_context *ctdb, struct ctdb_event_script_state *state) { int r; + struct tevent_fd *fde; struct ctdb_script_wire *current = get_current_script(state); current->start = timeval_current(); @@ -401,6 +386,9 @@ static int fork_child_for_script(struct ctdb_context *ctdb, if (state->child == 0) { int rt; + debug_extra = talloc_asprintf(NULL, "eventscript-%s-%s:", + current->name, + ctdb_eventscript_call_names[state->call]); close(state->fd[0]); set_close_on_exec(state->fd[1]); @@ -418,8 +406,35 @@ static int fork_child_for_script(struct ctdb_context *ctdb, DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child eventscript process\n", state->fd[0])); /* Set ourselves up to be called when that's done. */ - event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE, - ctdb_event_script_handler, state); + fde = event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ, + ctdb_event_script_handler, state); + tevent_fd_set_auto_close(fde); + + return 0; +} + +/* + Summarize status of this run of scripts. + */ +static int script_status(struct ctdb_scripts_wire *scripts) +{ + unsigned int i; + + for (i = 0; i < scripts->num_scripts; i++) { + switch (scripts->scripts[i].status) { + case -ENOENT: + case -ENOEXEC: + /* Disabled or missing; that's OK. */ + break; + case 0: + /* No problem. */ + break; + default: + return scripts->scripts[i].status; + } + } + + /* All OK! */ return 0; } @@ -431,7 +446,7 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event talloc_get_type(p, struct ctdb_event_script_state); struct ctdb_script_wire *current = get_current_script(state); struct ctdb_context *ctdb = state->ctdb; - int r; + int r, status; r = read(state->fd[0], ¤t->status, sizeof(current->status)); if (r < 0) { @@ -441,15 +456,6 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event } current->finished = timeval_current(); - - /* update overall status based on this script. */ - state->cb_status = current->status; - - /* don't stop just because it vanished or was disabled. */ - if (current->status == -ENOENT || current->status == -ENOEXEC) { - state->cb_status = 0; - } - /* valgrind gets overloaded if we run next script as it's still doing * post-execution analysis, so kill finished child here. */ if (ctdb->valgrinding) { @@ -458,10 +464,12 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event state->child = 0; + status = script_status(state->scripts); + /* Aborted or finished all scripts? We're done. */ - if (state->cb_status != 0 || state->current+1 == state->scripts->num_scripts) { + if (status != 0 || state->current+1 == state->scripts->num_scripts) { DEBUG(DEBUG_INFO,(__location__ " Eventscript %s %s finished with state %d\n", - ctdb_eventscript_call_names[state->call], state->options, state->cb_status)); + ctdb_eventscript_call_names[state->call], state->options, status)); ctdb->event_script_timeouts = 0; talloc_free(state); @@ -473,32 +481,81 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event /* Next script! */ state->current++; - state->cb_status = fork_child_for_script(ctdb, state); - if (state->cb_status != 0) { + current++; + current->status = fork_child_for_script(ctdb, state); + if (current->status != 0) { /* This calls the callback. */ talloc_free(state); } } +static void debug_timeout(struct ctdb_event_script_state *state) +{ + struct ctdb_script_wire *current = get_current_script(state); + char *cmd; + pid_t pid; + time_t t; + char tbuf[100], buf[200]; + + cmd = child_command_string(state->ctdb, state, + state->from_user, current->name, + state->call, state->options); + CTDB_NO_MEMORY_VOID(state->ctdb, cmd); + + DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n", + cmd, timeval_elapsed(¤t->start), state->child)); + talloc_free(cmd); + + t = time(NULL); + strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t)); + sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }" + " >/tmp/ctdb.event.%s.%d", tbuf, getpid()); + + pid = ctdb_fork(state->ctdb); + if (pid == 0) { + system(buf); + /* Now we can kill the child */ + kill(state->child, SIGTERM); + exit(0); + } + if (pid == -1) { + DEBUG(DEBUG_ERR,("Fork for debug script failed : %s\n", + strerror(errno))); + } else { + DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf)); + /* Don't kill child until timeout done. */ + state->child = 0; + } +} + /* called when child times out */ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p) { struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state); struct ctdb_context *ctdb = state->ctdb; + struct ctdb_script_wire *current = get_current_script(state); - DEBUG(DEBUG_ERR,("Event script timed out : %s %s count : %u pid : %d\n", - ctdb_eventscript_call_names[state->call], state->options, ctdb->event_script_timeouts, state->child)); - - state->cb_status = -ETIME; + DEBUG(DEBUG_ERR,("Event script timed out : %s %s %s count : %u pid : %d\n", + current->name, ctdb_eventscript_call_names[state->call], state->options, ctdb->event_script_timeouts, state->child)); - if (kill(state->child, 0) != 0) { - DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno)); - state->child = 0; + /* ignore timeouts for these events */ + switch (state->call) { + case CTDB_EVENT_START_RECOVERY: + case CTDB_EVENT_RECOVERED: + case CTDB_EVENT_TAKE_IP: + case CTDB_EVENT_RELEASE_IP: + case CTDB_EVENT_STOPPED: + case CTDB_EVENT_MONITOR: + case CTDB_EVENT_STATUS: + state->scripts->scripts[state->current].status = 0; + DEBUG(DEBUG_ERR,("Ignoring hung script for %s call %d\n", state->options, state->call)); + break; + default: + state->scripts->scripts[state->current].status = -ETIME; + debug_timeout(state); } - state->scripts->scripts[state->current].status = state->cb_status; - talloc_free(state); } @@ -507,6 +564,8 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve */ static int event_script_destructor(struct ctdb_event_script_state *state) { + int status; + if (state->child) { DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child)); @@ -520,7 +579,8 @@ static int event_script_destructor(struct ctdb_event_script_state *state) state->ctdb->current_monitor = NULL; } - /* Save our scripts as the last executed status, if we have them. */ + /* Save our scripts as the last executed status, if we have them. + * See ctdb_event_script_callback_v where we abort monitor event. */ if (state->scripts) { talloc_free(state->ctdb->last_status[state->call]); state->ctdb->last_status[state->call] = state->scripts; @@ -529,10 +589,17 @@ static int event_script_destructor(struct ctdb_event_script_state *state) } } + /* Use last status as result, or "OK" if none. */ + if (state->ctdb->last_status[state->call]) { + status = script_status(state->ctdb->last_status[state->call]); + } else { + status = 0; + } + /* This is allowed to free us; talloc will prevent double free anyway, * but beware if you call this outside the destructor! */ if (state->callback) { - state->callback(state->ctdb, state->cb_status, state->private_data); + state->callback(state->ctdb, status, state->private_data); } return 0; @@ -555,6 +622,8 @@ static bool check_options(enum ctdb_eventscript_call call, const char *options) { switch (call) { /* These all take no arguments. */ + case CTDB_EVENT_INIT: + case CTDB_EVENT_SETUP: case CTDB_EVENT_STARTUP: case CTDB_EVENT_START_RECOVERY: case CTDB_EVENT_RECOVERED: @@ -563,12 +632,16 @@ static bool check_options(enum ctdb_eventscript_call call, const char *options) case CTDB_EVENT_STATUS: case CTDB_EVENT_SHUTDOWN: case CTDB_EVENT_RELOAD: + case CTDB_EVENT_IPREALLOCATED: return count_words(options) == 0; case CTDB_EVENT_TAKE_IP: /* interface, IP address, netmask bits. */ case CTDB_EVENT_RELEASE_IP: return count_words(options) == 3; + case CTDB_EVENT_UPDATE_IP: /* old interface, new interface, IP address, netmask bits. */ + return count_words(options) == 4; + default: DEBUG(DEBUG_ERR,(__location__ "Unknown ctdb_eventscript_call %u\n", call)); return false; @@ -587,7 +660,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, const char *fmt, va_list ap) { struct ctdb_event_script_state *state; - int ret; state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state); CTDB_NO_MEMORY(ctdb, state); @@ -616,7 +688,13 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, /* we guarantee that only some specifically allowed event scripts are run while in recovery */ const enum ctdb_eventscript_call allowed_calls[] = { - CTDB_EVENT_START_RECOVERY, CTDB_EVENT_SHUTDOWN, CTDB_EVENT_RELEASE_IP, CTDB_EVENT_STOPPED }; + CTDB_EVENT_INIT, + CTDB_EVENT_SETUP, + CTDB_EVENT_START_RECOVERY, + CTDB_EVENT_SHUTDOWN, + CTDB_EVENT_RELEASE_IP, + CTDB_EVENT_STOPPED + }; int i; for (i=0;icurrent_monitor = NULL; } - if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) { - ctdb->current_monitor = state; - } - DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n", ctdb_eventscript_call_names[state->call], state->options)); @@ -653,22 +727,27 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, return -1; } state->current = 0; + state->child = 0; + + if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) { + ctdb->current_monitor = state; + } + + talloc_set_destructor(state, event_script_destructor); /* Nothing to do? */ if (state->scripts->num_scripts == 0) { - ctdb->event_script_timeouts = 0; talloc_free(state); return 0; } - ret = fork_child_for_script(ctdb, state); - if (ret != 0) { - talloc_free(state->scripts); - talloc_free(state); - return -1; - } + state->scripts->scripts[0].status = fork_child_for_script(ctdb, state); + if (state->scripts->scripts[0].status != 0) { + /* Callback is called from destructor, with fail result. */ + talloc_free(state); + return 0; + } - talloc_set_destructor(state, event_script_destructor); if (!timeval_is_zero(&state->timeout)) { event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state); } else {