#include "system/dir.h"
#include "system/locale.h"
#include "../include/ctdb_private.h"
-#include "lib/events/events.h"
+#include "lib/tevent/tevent.h"
#include "../common/rb_tree.h"
-static struct {
- struct timeval start;
- const char *script_running;
-} child_state;
-
-static const char *call_names[] = {
- "startup",
- "startrecovery",
- "recovered",
- "takeip",
- "releaseip",
- "stopped",
- "monitor",
- "status",
- "shutdown",
- "reload"
-};
-
static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p);
/*
- ctdbd sends us a SIGTERM when we should time out the current script
+ ctdbd sends us a SIGTERM when we should die.
*/
static void sigterm(int sig)
{
- char tbuf[100], buf[200];
- time_t t;
-
- DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n",
- child_state.script_running, timeval_elapsed(&child_state.start), getpid()));
-
- t = time(NULL);
-
- strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t));
- sprintf(buf, "pstree -p >/tmp/ctdb.event.%s.%d", tbuf, getpid());
- system(buf);
-
- DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
-
/* all the child processes will be running in the same process group */
kill(-getpgrp(), SIGKILL);
_exit(1);
pid_t child;
/* Warning: this can free us! */
void (*callback)(struct ctdb_context *, int, void *);
- int cb_status;
int fd[2];
void *private_data;
bool from_user;
{
struct ctdb_event_script_state *state
= talloc_get_type(p, struct ctdb_event_script_state);
- struct ctdb_script_wire *current = get_current_script(state);
+ struct ctdb_script_wire *current;
unsigned int slen, min;
+ /* We may have been aborted to run something else. Discard */
+ if (state->scripts == NULL) {
+ return;
+ }
+
+ current = get_current_script(state);
+
/* Append, but don't overfill buffer. It starts zero-filled. */
slen = strlen(current->output);
min = MIN(len, sizeof(current->output) - slen - 1);
memcpy(current->output + slen, str, min);
}
-int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA *outdata)
+int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb,
+ uint32_t call_type,
+ TDB_DATA *outdata)
{
- struct ctdb_scripts_wire *monitoring_scripts = ctdb->last_status[CTDB_EVENT_MONITOR];
-
- if (monitoring_scripts == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " last_monitor_status_ctx is NULL when reading status\n"));
+ if (call_type >= CTDB_EVENT_MAX) {
return -1;
}
- outdata->dsize = talloc_get_size(monitoring_scripts);
- outdata->dptr = (uint8_t *)monitoring_scripts;
-
+ if (ctdb->last_status[call_type] == NULL) {
+ /* If it's never been run, return nothing so they can tell. */
+ outdata->dsize = 0;
+ } else {
+ outdata->dsize = talloc_get_size(ctdb->last_status[call_type]);
+ outdata->dptr = (uint8_t *)ctdb->last_status[call_type];
+ }
return 0;
}
}
if (!(st.st_mode & S_IXUSR)) {
- DEBUG(DEBUG_INFO,("Event script %s is not executable. Ignoring this event script\n", full));
+ DEBUG(DEBUG_DEBUG,("Event script %s is not executable. Ignoring this event script\n", full));
errno = ENOEXEC;
talloc_free(full);
return false;
return talloc_asprintf(ctx, "%s%s/%s %s %s",
str,
ctdb->event_script_dir,
- scriptname, call_names[call], options);
+ scriptname,
+ ctdb_eventscript_call_names[call],
+ options);
}
}
int ret;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
- child_state.start = timeval_current();
ret = child_setup(ctdb);
if (ret != 0)
goto out;
cmdstr = child_command_string(ctdb, tmp_ctx, from_user,
current->name, call, options);
CTDB_NO_MEMORY(ctdb, cmdstr);
- child_state.script_running = cmdstr;
- DEBUG(DEBUG_INFO,("Executing event script %s\n",cmdstr));
+ DEBUG(DEBUG_DEBUG,("Executing event script %s\n",cmdstr));
if (current->status) {
ret = current->status;
struct ctdb_event_script_state *state)
{
int r;
+ struct tevent_fd *fde;
struct ctdb_script_wire *current = get_current_script(state);
current->start = timeval_current();
if (state->child == 0) {
int rt;
+ debug_extra = talloc_asprintf(NULL, "eventscript-%s-%s:",
+ current->name,
+ ctdb_eventscript_call_names[state->call]);
close(state->fd[0]);
set_close_on_exec(state->fd[1]);
DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child eventscript process\n", state->fd[0]));
/* Set ourselves up to be called when that's done. */
- event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
- ctdb_event_script_handler, state);
+ fde = event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ,
+ ctdb_event_script_handler, state);
+ tevent_fd_set_auto_close(fde);
+
+ return 0;
+}
+
+/*
+ Summarize status of this run of scripts.
+ */
+static int script_status(struct ctdb_scripts_wire *scripts)
+{
+ unsigned int i;
+
+ for (i = 0; i < scripts->num_scripts; i++) {
+ switch (scripts->scripts[i].status) {
+ case -ENOENT:
+ case -ENOEXEC:
+ /* Disabled or missing; that's OK. */
+ break;
+ case 0:
+ /* No problem. */
+ break;
+ default:
+ return scripts->scripts[i].status;
+ }
+ }
+
+ /* All OK! */
return 0;
}
talloc_get_type(p, struct ctdb_event_script_state);
struct ctdb_script_wire *current = get_current_script(state);
struct ctdb_context *ctdb = state->ctdb;
- int r;
+ int r, status;
r = read(state->fd[0], ¤t->status, sizeof(current->status));
if (r < 0) {
}
current->finished = timeval_current();
-
- /* update overall status based on this script. */
- state->cb_status = current->status;
-
- /* don't stop just because it vanished or was disabled. */
- if (current->status == -ENOENT || current->status == -ENOEXEC) {
- state->cb_status = 0;
+ /* valgrind gets overloaded if we run next script as it's still doing
+ * post-execution analysis, so kill finished child here. */
+ if (ctdb->valgrinding) {
+ kill(state->child, SIGKILL);
}
state->child = 0;
+ status = script_status(state->scripts);
+
/* Aborted or finished all scripts? We're done. */
- if (state->cb_status != 0 || state->current+1 == state->scripts->num_scripts) {
+ if (status != 0 || state->current+1 == state->scripts->num_scripts) {
DEBUG(DEBUG_INFO,(__location__ " Eventscript %s %s finished with state %d\n",
- call_names[state->call], state->options, state->cb_status));
+ ctdb_eventscript_call_names[state->call], state->options, status));
ctdb->event_script_timeouts = 0;
talloc_free(state);
/* Next script! */
state->current++;
- state->cb_status = fork_child_for_script(ctdb, state);
- if (state->cb_status != 0) {
+ current++;
+ current->status = fork_child_for_script(ctdb, state);
+ if (current->status != 0) {
/* This calls the callback. */
talloc_free(state);
}
}
+static void debug_timeout(struct ctdb_event_script_state *state)
+{
+ struct ctdb_script_wire *current = get_current_script(state);
+ char *cmd;
+ pid_t pid;
+ time_t t;
+ char tbuf[100], buf[200];
+
+ cmd = child_command_string(state->ctdb, state,
+ state->from_user, current->name,
+ state->call, state->options);
+ CTDB_NO_MEMORY_VOID(state->ctdb, cmd);
+
+ DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n",
+ cmd, timeval_elapsed(¤t->start), state->child));
+ talloc_free(cmd);
+
+ t = time(NULL);
+ strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t));
+ sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
+ " >/tmp/ctdb.event.%s.%d", tbuf, getpid());
+
+ pid = ctdb_fork(state->ctdb);
+ if (pid == 0) {
+ system(buf);
+ /* Now we can kill the child */
+ kill(state->child, SIGTERM);
+ exit(0);
+ }
+ if (pid == -1) {
+ DEBUG(DEBUG_ERR,("Fork for debug script failed : %s\n",
+ strerror(errno)));
+ } else {
+ DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
+ /* Don't kill child until timeout done. */
+ state->child = 0;
+ }
+}
+
/* called when child times out */
static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te,
struct timeval t, void *p)
{
struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
struct ctdb_context *ctdb = state->ctdb;
+ struct ctdb_script_wire *current = get_current_script(state);
- DEBUG(DEBUG_ERR,("Event script timed out : %s %s count : %u pid : %d\n",
- call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));
-
- state->cb_status = -ETIME;
+ DEBUG(DEBUG_ERR,("Event script timed out : %s %s %s count : %u pid : %d\n",
+ current->name, ctdb_eventscript_call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));
- if (kill(state->child, 0) != 0) {
- DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
- state->child = 0;
+ /* ignore timeouts for these events */
+ switch (state->call) {
+ case CTDB_EVENT_START_RECOVERY:
+ case CTDB_EVENT_RECOVERED:
+ case CTDB_EVENT_TAKE_IP:
+ case CTDB_EVENT_RELEASE_IP:
+ case CTDB_EVENT_STOPPED:
+ case CTDB_EVENT_MONITOR:
+ case CTDB_EVENT_STATUS:
+ state->scripts->scripts[state->current].status = 0;
+ DEBUG(DEBUG_ERR,("Ignoring hung script for %s call %d\n", state->options, state->call));
+ break;
+ default:
+ state->scripts->scripts[state->current].status = -ETIME;
+ debug_timeout(state);
}
- state->scripts->scripts[state->current].status = state->cb_status;
-
talloc_free(state);
}
*/
static int event_script_destructor(struct ctdb_event_script_state *state)
{
+ int status;
+
if (state->child) {
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
state->ctdb->current_monitor = NULL;
}
- /* Save our status as the last executed status. */
- talloc_free(state->ctdb->last_status[state->call]);
- state->ctdb->last_status[state->call] = state->scripts;
- if (state->current < state->ctdb->last_status[state->call]->num_scripts) {
- state->ctdb->last_status[state->call]->num_scripts = state->current+1;
+ /* Save our scripts as the last executed status, if we have them.
+ * See ctdb_event_script_callback_v where we abort monitor event. */
+ if (state->scripts) {
+ talloc_free(state->ctdb->last_status[state->call]);
+ state->ctdb->last_status[state->call] = state->scripts;
+ if (state->current < state->ctdb->last_status[state->call]->num_scripts) {
+ state->ctdb->last_status[state->call]->num_scripts = state->current+1;
+ }
+ }
+
+ /* Use last status as result, or "OK" if none. */
+ if (state->ctdb->last_status[state->call]) {
+ status = script_status(state->ctdb->last_status[state->call]);
+ } else {
+ status = 0;
}
/* This is allowed to free us; talloc will prevent double free anyway,
* but beware if you call this outside the destructor! */
if (state->callback) {
- state->callback(state->ctdb, state->cb_status, state->private_data);
+ state->callback(state->ctdb, status, state->private_data);
}
return 0;
{
switch (call) {
/* These all take no arguments. */
+ case CTDB_EVENT_INIT:
+ case CTDB_EVENT_SETUP:
case CTDB_EVENT_STARTUP:
case CTDB_EVENT_START_RECOVERY:
case CTDB_EVENT_RECOVERED:
case CTDB_EVENT_STATUS:
case CTDB_EVENT_SHUTDOWN:
case CTDB_EVENT_RELOAD:
+ case CTDB_EVENT_IPREALLOCATED:
return count_words(options) == 0;
case CTDB_EVENT_TAKE_IP: /* interface, IP address, netmask bits. */
case CTDB_EVENT_RELEASE_IP:
return count_words(options) == 3;
+ case CTDB_EVENT_UPDATE_IP: /* old interface, new interface, IP address, netmask bits. */
+ return count_words(options) == 4;
+
default:
DEBUG(DEBUG_ERR,(__location__ "Unknown ctdb_eventscript_call %u\n", call));
return false;
const char *fmt, va_list ap)
{
struct ctdb_event_script_state *state;
- int ret;
state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state);
CTDB_NO_MEMORY(ctdb, state);
}
if (!check_options(state->call, state->options)) {
DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for %s\n",
- call_names[state->call], state->options));
+ ctdb_eventscript_call_names[state->call], state->options));
talloc_free(state);
return -1;
}
/* we guarantee that only some specifically allowed event scripts are run
while in recovery */
const enum ctdb_eventscript_call allowed_calls[] = {
- CTDB_EVENT_START_RECOVERY, CTDB_EVENT_SHUTDOWN, CTDB_EVENT_RELEASE_IP, CTDB_EVENT_STOPPED };
+ CTDB_EVENT_INIT,
+ CTDB_EVENT_SETUP,
+ CTDB_EVENT_START_RECOVERY,
+ CTDB_EVENT_SHUTDOWN,
+ CTDB_EVENT_RELEASE_IP,
+ CTDB_EVENT_STOPPED
+ };
int i;
for (i=0;i<ARRAY_SIZE(allowed_calls);i++) {
if (call == allowed_calls[i]) break;
}
if (i == ARRAY_SIZE(allowed_calls)) {
DEBUG(DEBUG_ERR,("Refusing to run event scripts call '%s' while in recovery\n",
- call_names[call]));
+ ctdb_eventscript_call_names[call]));
talloc_free(state);
return -1;
}
}
/* Kill off any running monitor events to run this event. */
- talloc_free(ctdb->current_monitor);
- ctdb->current_monitor = NULL;
-
- if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) {
- ctdb->current_monitor = state;
+ if (ctdb->current_monitor) {
+ /* Discard script status so we don't save to last_status */
+ talloc_free(ctdb->current_monitor->scripts);
+ ctdb->current_monitor->scripts = NULL;
+ talloc_free(ctdb->current_monitor);
+ ctdb->current_monitor = NULL;
}
DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n",
- call_names[state->call], state->options));
+ ctdb_eventscript_call_names[state->call],
+ state->options));
/* This is not a child of state, since we save it in destructor. */
state->scripts = ctdb_get_script_list(ctdb, ctdb);
return -1;
}
state->current = 0;
+ state->child = 0;
+
+ if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) {
+ ctdb->current_monitor = state;
+ }
+
+ talloc_set_destructor(state, event_script_destructor);
/* Nothing to do? */
if (state->scripts->num_scripts == 0) {
- ctdb->event_script_timeouts = 0;
talloc_free(state);
return 0;
}
- ret = fork_child_for_script(ctdb, state);
- if (ret != 0) {
- talloc_free(state->scripts);
- talloc_free(state);
- return -1;
- }
+ state->scripts->scripts[0].status = fork_child_for_script(ctdb, state);
+ if (state->scripts->scripts[0].status != 0) {
+ /* Callback is called from destructor, with fail result. */
+ talloc_free(state);
+ return 0;
+ }
- talloc_set_destructor(state, event_script_destructor);
if (!timeval_is_zero(&state->timeout)) {
event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
} else {
DEBUG(DEBUG_ERR, (__location__ " eventscript %s %s called with no timeout\n",
- call_names[state->call], state->options));
+ ctdb_eventscript_call_names[state->call],
+ state->options));
}
return 0;
if (status.status == -ETIME) {
DEBUG(DEBUG_ERR, (__location__ " eventscript for '%s' timedout."
" Immediately banning ourself for %d seconds\n",
- call_names[call],
+ ctdb_eventscript_call_names[call],
ctdb->tunable.recovery_ban_period));
ctdb_ban_self(ctdb);
}
p += strspn(p, " \t");
/* See if we match any. */
- for (*call = 0; *call < ARRAY_SIZE(call_names); (*call)++) {
- len = strlen(call_names[*call]);
- if (strncmp(p, call_names[*call], len) == 0) {
+ for (*call = 0; *call < CTDB_EVENT_MAX; (*call)++) {
+ len = strlen(ctdb_eventscript_call_names[*call]);
+ if (strncmp(p, ctdb_eventscript_call_names[*call], len) == 0) {
/* If end of string or whitespace, we're done. */
if (strcspn(p + len, " \t") == 0) {
return p + len;