#include "system/dir.h"
#include "system/locale.h"
#include "../include/ctdb_private.h"
-#include "lib/tevent/tevent.h"
#include "../common/rb_tree.h"
+#include "lib/util/dlinklist.h"
static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p);
*/
static void sigterm(int sig)
{
+ pid_t pid;
+
/* all the child processes will be running in the same process group */
- kill(-getpgrp(), SIGKILL);
+ pid = getpgrp();
+ if (pid == -1) {
+ kill(-getpid(), SIGKILL);
+ } else {
+ kill(-pid, SIGKILL);
+ }
_exit(1);
}
/* This is attached to the event script state. */
struct event_script_callback {
- struct ctdb_event_script_state *state;
+ struct event_script_callback *next, *prev;
+ struct ctdb_context *ctdb;
/* Warning: this can free us! */
void (*fn)(struct ctdb_context *, int, void *);
tree_item = talloc(tree, struct ctdb_script_tree_item);
if (tree_item == NULL) {
DEBUG(DEBUG_ERR, (__location__ " Failed to allocate new tree item\n"));
+ closedir(dir);
talloc_free(tmp_ctx);
return NULL;
}
tree_item->name = talloc_strdup(tree_item, de->d_name);
if (tree_item->name == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to allocate script name.\n"));
+ closedir(dir);
talloc_free(tmp_ctx);
return NULL;
}
ctdb_eventscript_call_names[state->call]);
close(state->fd[0]);
set_close_on_exec(state->fd[1]);
+ ctdb_set_process_name("ctdb_eventscript");
rt = child_run_script(ctdb, state->from_user, state->call, state->options, current);
/* We must be able to write PIPEBUF bytes at least; if this
/* valgrind gets overloaded if we run next script as it's still doing
* post-execution analysis, so kill finished child here. */
if (ctdb->valgrinding) {
- kill(state->child, SIGKILL);
+ ctdb_kill(ctdb, state->child, SIGKILL);
}
state->child = 0;
}
}
-static void debug_timeout(struct ctdb_event_script_state *state)
+static void ctdb_run_debug_hung_script(struct ctdb_context *ctdb, struct ctdb_event_script_state *state)
{
struct ctdb_script_wire *current = get_current_script(state);
char *cmd;
pid_t pid;
- time_t t;
- char tbuf[100], buf[200];
+ const char * debug_hung_script = ETCDIR "/ctdb/debug-hung-script.sh";
- cmd = child_command_string(state->ctdb, state,
+ cmd = child_command_string(ctdb, state,
state->from_user, current->name,
state->call, state->options);
CTDB_NO_MEMORY_VOID(state->ctdb, cmd);
cmd, timeval_elapsed(¤t->start), state->child));
talloc_free(cmd);
- t = time(NULL);
- strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t));
- sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
- " >/tmp/ctdb.event.%s.%d", tbuf, getpid());
-
- pid = ctdb_fork(state->ctdb);
- if (pid == 0) {
- system(buf);
- /* Now we can kill the child */
- kill(state->child, SIGTERM);
- exit(0);
+ if (!ctdb_fork_with_logging(ctdb, ctdb, "Hung script", NULL, NULL, &pid)) {
+ DEBUG(DEBUG_ERR,("Failed to fork a child process with logging to track hung event script\n"));
+ ctdb_kill(state->ctdb, state->child, SIGTERM);
+ return;
}
if (pid == -1) {
DEBUG(DEBUG_ERR,("Fork for debug script failed : %s\n",
strerror(errno)));
- } else {
- DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
- /* Don't kill child until timeout done. */
- state->child = 0;
+ ctdb_kill(state->ctdb, state->child, SIGTERM);
+ return;
+ }
+ if (pid == 0) {
+ char *buf;
+
+ ctdb_set_process_name("ctdb_debug_hung_script");
+ if (getenv("CTDB_DEBUG_HUNG_SCRIPT") != NULL) {
+ debug_hung_script = getenv("CTDB_DEBUG_HUNG_SCRIPT");
+ }
+
+ buf = talloc_asprintf(NULL, "%s %d",
+ debug_hung_script, state->child);
+ system(buf);
+ talloc_free(buf);
+
+ /* Now we can kill the child */
+ ctdb_kill(state->ctdb, state->child, SIGTERM);
+ _exit(0);
}
+
+ /* Don't kill child until timeout done. */
+ state->child = 0;
}
/* called when child times out */
case CTDB_EVENT_RECOVERED:
case CTDB_EVENT_TAKE_IP:
case CTDB_EVENT_RELEASE_IP:
- case CTDB_EVENT_STOPPED:
- case CTDB_EVENT_MONITOR:
case CTDB_EVENT_STATUS:
state->scripts->scripts[state->current].status = 0;
DEBUG(DEBUG_ERR,("Ignoring hung script for %s call %d\n", state->options, state->call));
+ ctdb_run_debug_hung_script(ctdb, state);
break;
default:
state->scripts->scripts[state->current].status = -ETIME;
- debug_timeout(state);
+ ctdb_run_debug_hung_script(ctdb, state);
}
talloc_free(state);
if (state->child) {
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
- if (kill(state->child, SIGTERM) != 0) {
+ if (ctdb_kill(state->ctdb, state->child, SIGTERM) != 0) {
DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
}
}
}
/* This is allowed to free us; talloc will prevent double free anyway,
- * but beware if you call this outside the destructor! */
- callback = state->callback;
+ * but beware if you call this outside the destructor!
+ * the callback hangs off a different context so we walk the list
+ * of "active" callbacks until we find the one state points to.
+ * if we cant find it it means the callback has been removed.
+ */
+ for (callback = state->ctdb->script_callbacks; callback != NULL; callback = callback->next) {
+ if (callback == state->callback) {
+ break;
+ }
+ }
+
+ state->callback = NULL;
if (callback) {
/* Make sure destructor doesn't free itself! */
case CTDB_EVENT_STARTUP:
case CTDB_EVENT_START_RECOVERY:
case CTDB_EVENT_RECOVERED:
- case CTDB_EVENT_STOPPED:
case CTDB_EVENT_MONITOR:
case CTDB_EVENT_STATUS:
case CTDB_EVENT_SHUTDOWN:
static int remove_callback(struct event_script_callback *callback)
{
- /* Detach ourselves from the running script state */
- callback->state->callback = NULL;
+ DLIST_REMOVE(callback->ctdb->script_callbacks, callback);
return 0;
}
{
struct ctdb_event_script_state *state;
- state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state);
- CTDB_NO_MEMORY(ctdb, state);
-
- /* The callback isn't done if the context is freed. */
- state->callback = talloc(mem_ctx, struct event_script_callback);
- CTDB_NO_MEMORY(ctdb, state->callback);
- talloc_set_destructor(state->callback, remove_callback);
- state->callback->state = state;
- state->callback->fn = callback;
- state->callback->private_data = private_data;
-
- state->ctdb = ctdb;
- state->from_user = from_user;
- state->call = call;
- state->options = talloc_vasprintf(state, fmt, ap);
- state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
- state->scripts = NULL;
- if (state->options == NULL) {
- DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
- talloc_free(state);
- return -1;
- }
- if (!check_options(state->call, state->options)) {
- DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for %s\n",
- ctdb_eventscript_call_names[state->call], state->options));
- talloc_free(state);
- return -1;
- }
-
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
/* we guarantee that only some specifically allowed event scripts are run
while in recovery */
CTDB_EVENT_START_RECOVERY,
CTDB_EVENT_SHUTDOWN,
CTDB_EVENT_RELEASE_IP,
- CTDB_EVENT_STOPPED
+ CTDB_EVENT_IPREALLOCATED,
};
int i;
for (i=0;i<ARRAY_SIZE(allowed_calls);i++) {
if (i == ARRAY_SIZE(allowed_calls)) {
DEBUG(DEBUG_ERR,("Refusing to run event scripts call '%s' while in recovery\n",
ctdb_eventscript_call_names[call]));
- talloc_free(state);
return -1;
}
}
if (ctdb->current_monitor) {
struct ctdb_event_script_state *ms = talloc_get_type(ctdb->current_monitor, struct ctdb_event_script_state);
- /* cancel it */
- if (ms->callback != NULL) {
+ /* Cancel current monitor callback state only if monitoring
+ * context ctdb->monitor->monitor_context has not been freed */
+ if (ms->callback != NULL && !ctdb_stopped_monitoring(ctdb)) {
ms->callback->fn(ctdb, -ECANCELED, ms->callback->private_data);
talloc_free(ms->callback);
}
ctdb->current_monitor = NULL;
}
+ state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state);
+ CTDB_NO_MEMORY(ctdb, state);
+
+ /* The callback isn't done if the context is freed. */
+ state->callback = talloc(mem_ctx, struct event_script_callback);
+ CTDB_NO_MEMORY(ctdb, state->callback);
+ DLIST_ADD(ctdb->script_callbacks, state->callback);
+ talloc_set_destructor(state->callback, remove_callback);
+ state->callback->ctdb = ctdb;
+ state->callback->fn = callback;
+ state->callback->private_data = private_data;
+
+ state->ctdb = ctdb;
+ state->from_user = from_user;
+ state->call = call;
+ state->options = talloc_vasprintf(state, fmt, ap);
+ state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
+ state->scripts = NULL;
+ if (state->options == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
+ talloc_free(state);
+ return -1;
+ }
+ if (!check_options(state->call, state->options)) {
+ DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for %s\n",
+ ctdb_eventscript_call_names[state->call], state->options));
+ talloc_free(state);
+ return -1;
+ }
+
DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n",
ctdb_eventscript_call_names[state->call],
state->options));
va_start(ap, fmt);
ret = ctdb_event_script_callback_v(ctdb, ctdb,
event_script_callback, &status, false, call, fmt, ap);
+ va_end(ap);
if (ret != 0) {
return ret;
}
- va_end(ap);
status.status = -1;
status.done = false;
" Immediately banning ourself for %d seconds\n",
ctdb_eventscript_call_names[call],
ctdb->tunable.recovery_ban_period));
- ctdb_ban_self(ctdb);
+
+ /* Don't ban self if CTDB is starting up or shutting down */
+ if (call != CTDB_EVENT_INIT && call != CTDB_EVENT_SHUTDOWN) {
+ ctdb_ban_self(ctdb);
+ }
}
return status.status;
ctdb_enable_monitoring(ctdb);
if (status != 0) {
- DEBUG(DEBUG_ERR,(__location__ " Failed to forcibly run eventscripts\n"));
+ DEBUG(DEBUG_ERR,(__location__ " Failed to run eventscripts\n"));
}
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
/* Figure out what call they want. */
options = get_call((const char *)indata.dptr, &call);
if (!options) {
- DEBUG(DEBUG_ERR, (__location__ " Invalid forced \"%s\"\n", (const char *)indata.dptr));
+ DEBUG(DEBUG_ERR, (__location__ " Invalid event name \"%s\"\n", (const char *)indata.dptr));
return -1;
}
state->c = talloc_steal(state, c);
- DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
+ DEBUG(DEBUG_NOTICE,("Running eventscripts with arguments %s\n", indata.dptr));
ctdb_disable_monitoring(ctdb);