#include "system/dir.h"
#include "system/locale.h"
#include "../include/ctdb_private.h"
-#include "lib/events/events.h"
+#include "lib/tevent/tevent.h"
#include "../common/rb_tree.h"
-static struct {
- struct timeval start;
- const char *script_running;
-} child_state;
-
static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p);
/*
- ctdbd sends us a SIGTERM when we should time out the current script
+ ctdbd sends us a SIGTERM when we should die.
*/
static void sigterm(int sig)
{
- char tbuf[100], buf[200];
- time_t t;
-
- DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n",
- child_state.script_running, timeval_elapsed(&child_state.start), getpid()));
-
- t = time(NULL);
-
- strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t));
- sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
- " >/tmp/ctdb.event.%s.%d", tbuf, getpid());
- system(buf);
-
- DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
-
/* all the child processes will be running in the same process group */
kill(-getpgrp(), SIGKILL);
_exit(1);
pid_t child;
/* Warning: this can free us! */
void (*callback)(struct ctdb_context *, int, void *);
- int cb_status;
int fd[2];
void *private_data;
bool from_user;
{
struct ctdb_event_script_state *state
= talloc_get_type(p, struct ctdb_event_script_state);
- struct ctdb_script_wire *current = get_current_script(state);
+ struct ctdb_script_wire *current;
unsigned int slen, min;
+ /* We may have been aborted to run something else. Discard */
+ if (state->scripts == NULL) {
+ return;
+ }
+
+ current = get_current_script(state);
+
/* Append, but don't overfill buffer. It starts zero-filled. */
slen = strlen(current->output);
min = MIN(len, sizeof(current->output) - slen - 1);
}
if (!(st.st_mode & S_IXUSR)) {
- DEBUG(DEBUG_INFO,("Event script %s is not executable. Ignoring this event script\n", full));
+ DEBUG(DEBUG_DEBUG,("Event script %s is not executable. Ignoring this event script\n", full));
errno = ENOEXEC;
talloc_free(full);
return false;
int ret;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
- child_state.start = timeval_current();
ret = child_setup(ctdb);
if (ret != 0)
goto out;
cmdstr = child_command_string(ctdb, tmp_ctx, from_user,
current->name, call, options);
CTDB_NO_MEMORY(ctdb, cmdstr);
- child_state.script_running = cmdstr;
- DEBUG(DEBUG_INFO,("Executing event script %s\n",cmdstr));
+ DEBUG(DEBUG_DEBUG,("Executing event script %s\n",cmdstr));
if (current->status) {
ret = current->status;
struct ctdb_event_script_state *state)
{
int r;
+ struct tevent_fd *fde;
struct ctdb_script_wire *current = get_current_script(state);
current->start = timeval_current();
if (state->child == 0) {
int rt;
+ debug_extra = talloc_asprintf(NULL, "eventscript-%s-%s:",
+ current->name,
+ ctdb_eventscript_call_names[state->call]);
close(state->fd[0]);
set_close_on_exec(state->fd[1]);
DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child eventscript process\n", state->fd[0]));
/* Set ourselves up to be called when that's done. */
- event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
- ctdb_event_script_handler, state);
+ fde = event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ,
+ ctdb_event_script_handler, state);
+ tevent_fd_set_auto_close(fde);
+
+ return 0;
+}
+
+/*
+ Summarize status of this run of scripts.
+ */
+static int script_status(struct ctdb_scripts_wire *scripts)
+{
+ unsigned int i;
+
+ for (i = 0; i < scripts->num_scripts; i++) {
+ switch (scripts->scripts[i].status) {
+ case -ENOENT:
+ case -ENOEXEC:
+ /* Disabled or missing; that's OK. */
+ break;
+ case 0:
+ /* No problem. */
+ break;
+ default:
+ return scripts->scripts[i].status;
+ }
+ }
+
+ /* All OK! */
return 0;
}
talloc_get_type(p, struct ctdb_event_script_state);
struct ctdb_script_wire *current = get_current_script(state);
struct ctdb_context *ctdb = state->ctdb;
- int r;
+ int r, status;
r = read(state->fd[0], ¤t->status, sizeof(current->status));
if (r < 0) {
}
current->finished = timeval_current();
-
- /* update overall status based on this script. */
- state->cb_status = current->status;
-
- /* don't stop just because it vanished or was disabled. */
- if (current->status == -ENOENT || current->status == -ENOEXEC) {
- state->cb_status = 0;
+ /* valgrind gets overloaded if we run next script as it's still doing
+ * post-execution analysis, so kill finished child here. */
+ if (ctdb->valgrinding) {
+ kill(state->child, SIGKILL);
}
state->child = 0;
+ status = script_status(state->scripts);
+
/* Aborted or finished all scripts? We're done. */
- if (state->cb_status != 0 || state->current+1 == state->scripts->num_scripts) {
+ if (status != 0 || state->current+1 == state->scripts->num_scripts) {
DEBUG(DEBUG_INFO,(__location__ " Eventscript %s %s finished with state %d\n",
- ctdb_eventscript_call_names[state->call], state->options, state->cb_status));
+ ctdb_eventscript_call_names[state->call], state->options, status));
ctdb->event_script_timeouts = 0;
talloc_free(state);
/* Next script! */
state->current++;
- state->cb_status = fork_child_for_script(ctdb, state);
- if (state->cb_status != 0) {
+ current++;
+ current->status = fork_child_for_script(ctdb, state);
+ if (current->status != 0) {
/* This calls the callback. */
talloc_free(state);
}
}
+static void debug_timeout(struct ctdb_event_script_state *state)
+{
+ struct ctdb_script_wire *current = get_current_script(state);
+ char *cmd;
+ pid_t pid;
+ time_t t;
+ char tbuf[100], buf[200];
+
+ cmd = child_command_string(state->ctdb, state,
+ state->from_user, current->name,
+ state->call, state->options);
+ CTDB_NO_MEMORY_VOID(state->ctdb, cmd);
+
+ DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n",
+ cmd, timeval_elapsed(¤t->start), state->child));
+ talloc_free(cmd);
+
+ t = time(NULL);
+ strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t));
+ sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
+ " >/tmp/ctdb.event.%s.%d", tbuf, getpid());
+
+ pid = ctdb_fork(state->ctdb);
+ if (pid == 0) {
+ system(buf);
+ /* Now we can kill the child */
+ kill(state->child, SIGTERM);
+ exit(0);
+ }
+ if (pid == -1) {
+ DEBUG(DEBUG_ERR,("Fork for debug script failed : %s\n",
+ strerror(errno)));
+ } else {
+ DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
+ /* Don't kill child until timeout done. */
+ state->child = 0;
+ }
+}
+
/* called when child times out */
static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te,
struct timeval t, void *p)
{
struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
struct ctdb_context *ctdb = state->ctdb;
+ struct ctdb_script_wire *current = get_current_script(state);
- DEBUG(DEBUG_ERR,("Event script timed out : %s %s count : %u pid : %d\n",
- ctdb_eventscript_call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));
-
- state->cb_status = -ETIME;
+ DEBUG(DEBUG_ERR,("Event script timed out : %s %s %s count : %u pid : %d\n",
+ current->name, ctdb_eventscript_call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));
- if (kill(state->child, 0) != 0) {
- DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
- state->child = 0;
+ /* ignore timeouts for these events */
+ switch (state->call) {
+ case CTDB_EVENT_START_RECOVERY:
+ case CTDB_EVENT_RECOVERED:
+ case CTDB_EVENT_TAKE_IP:
+ case CTDB_EVENT_RELEASE_IP:
+ case CTDB_EVENT_STOPPED:
+ case CTDB_EVENT_MONITOR:
+ case CTDB_EVENT_STATUS:
+ state->scripts->scripts[state->current].status = 0;
+ DEBUG(DEBUG_ERR,("Ignoring hung script for %s call %d\n", state->options, state->call));
+ break;
+ default:
+ state->scripts->scripts[state->current].status = -ETIME;
+ debug_timeout(state);
}
- state->scripts->scripts[state->current].status = state->cb_status;
-
talloc_free(state);
}
*/
static int event_script_destructor(struct ctdb_event_script_state *state)
{
+ int status;
+
if (state->child) {
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
state->ctdb->current_monitor = NULL;
}
- /* Save our scripts as the last executed status, if we have them. */
+ /* Save our scripts as the last executed status, if we have them.
+ * See ctdb_event_script_callback_v where we abort monitor event. */
if (state->scripts) {
talloc_free(state->ctdb->last_status[state->call]);
state->ctdb->last_status[state->call] = state->scripts;
}
}
+ /* Use last status as result, or "OK" if none. */
+ if (state->ctdb->last_status[state->call]) {
+ status = script_status(state->ctdb->last_status[state->call]);
+ } else {
+ status = 0;
+ }
+
/* This is allowed to free us; talloc will prevent double free anyway,
* but beware if you call this outside the destructor! */
if (state->callback) {
- state->callback(state->ctdb, state->cb_status, state->private_data);
+ state->callback(state->ctdb, status, state->private_data);
}
return 0;
{
switch (call) {
/* These all take no arguments. */
+ case CTDB_EVENT_INIT:
+ case CTDB_EVENT_SETUP:
case CTDB_EVENT_STARTUP:
case CTDB_EVENT_START_RECOVERY:
case CTDB_EVENT_RECOVERED:
case CTDB_EVENT_STATUS:
case CTDB_EVENT_SHUTDOWN:
case CTDB_EVENT_RELOAD:
+ case CTDB_EVENT_IPREALLOCATED:
return count_words(options) == 0;
case CTDB_EVENT_TAKE_IP: /* interface, IP address, netmask bits. */
case CTDB_EVENT_RELEASE_IP:
return count_words(options) == 3;
+ case CTDB_EVENT_UPDATE_IP: /* old interface, new interface, IP address, netmask bits. */
+ return count_words(options) == 4;
+
default:
DEBUG(DEBUG_ERR,(__location__ "Unknown ctdb_eventscript_call %u\n", call));
return false;
const char *fmt, va_list ap)
{
struct ctdb_event_script_state *state;
- int ret;
state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state);
CTDB_NO_MEMORY(ctdb, state);
/* we guarantee that only some specifically allowed event scripts are run
while in recovery */
const enum ctdb_eventscript_call allowed_calls[] = {
- CTDB_EVENT_START_RECOVERY, CTDB_EVENT_SHUTDOWN, CTDB_EVENT_RELEASE_IP, CTDB_EVENT_STOPPED };
+ CTDB_EVENT_INIT,
+ CTDB_EVENT_SETUP,
+ CTDB_EVENT_START_RECOVERY,
+ CTDB_EVENT_SHUTDOWN,
+ CTDB_EVENT_RELEASE_IP,
+ CTDB_EVENT_STOPPED
+ };
int i;
for (i=0;i<ARRAY_SIZE(allowed_calls);i++) {
if (call == allowed_calls[i]) break;
ctdb->current_monitor = NULL;
}
- if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) {
- ctdb->current_monitor = state;
- }
-
DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n",
ctdb_eventscript_call_names[state->call],
state->options));
return -1;
}
state->current = 0;
+ state->child = 0;
+
+ if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) {
+ ctdb->current_monitor = state;
+ }
+
+ talloc_set_destructor(state, event_script_destructor);
/* Nothing to do? */
if (state->scripts->num_scripts == 0) {
- ctdb->event_script_timeouts = 0;
talloc_free(state);
return 0;
}
- ret = fork_child_for_script(ctdb, state);
- if (ret != 0) {
- talloc_free(state->scripts);
- talloc_free(state);
- return -1;
- }
+ state->scripts->scripts[0].status = fork_child_for_script(ctdb, state);
+ if (state->scripts->scripts[0].status != 0) {
+ /* Callback is called from destructor, with fail result. */
+ talloc_free(state);
+ return 0;
+ }
- talloc_set_destructor(state, event_script_destructor);
if (!timeval_is_zero(&state->timeout)) {
event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
} else {