4 Copyright (C) Andrew Tridgell 2007
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "system/filesys.h"
22 #include "system/wait.h"
23 #include "system/dir.h"
24 #include "system/locale.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/events/events.h"
27 #include "../common/rb_tree.h"
31 const char *script_running;
35 ctdbd sends us a SIGTERM when we should time out the current script
37 static void sigterm(int sig)
39 DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds\n",
40 child_state.script_running, timeval_elapsed(&child_state.start)));
41 /* all the child processes will be running in the same process group */
42 kill(-getpgrp(), SIGKILL);
46 struct ctdb_event_script_state {
47 struct ctdb_context *ctdb;
49 void (*callback)(struct ctdb_context *, int, void *);
56 run the event script - varargs version
57 this function is called and run in the context of a forked child
58 which allows it to do blocking calls such as system()
60 static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
65 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
72 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
73 /* we guarantee that only some specifically allowed event scripts are run
75 const char *allowed_scripts[] = {"startrecovery", "shutdown" };
77 for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) {
78 if (strcmp(options, allowed_scripts[i]) == 0) break;
80 if (i == ARRAY_SIZE(allowed_scripts)) {
81 DEBUG(0,("Refusing to run event scripts with option '%s' while in recovery\n",
87 if (setpgid(0,0) != 0) {
88 DEBUG(DEBUG_ERR,("Failed to create process group for event scripts - %s\n",
94 signal(SIGTERM, sigterm);
96 child_state.start = timeval_current();
97 child_state.script_running = "startup";
100 the service specific event scripts
102 if (stat(ctdb->event_script_dir, &st) != 0 &&
104 DEBUG(DEBUG_CRIT,("No event script directory found at '%s'\n", ctdb->event_script_dir));
105 talloc_free(tmp_ctx);
109 /* create a tree to store all the script names in */
110 tree = trbt_create(tmp_ctx, 0);
112 /* scan all directory entries and insert all valid scripts into the
115 dir = opendir(ctdb->event_script_dir);
117 DEBUG(DEBUG_CRIT,("Failed to open event script directory '%s'\n", ctdb->event_script_dir));
118 talloc_free(tmp_ctx);
123 while ((de=readdir(dir)) != NULL) {
128 namlen = strlen(de->d_name);
134 if (de->d_name[namlen-1] == '~') {
135 /* skip files emacs left behind */
139 if (de->d_name[2] != '.') {
143 if (sscanf(de->d_name, "%02u.", &num) != 1) {
147 /* Make sure the event script is executable */
148 str = talloc_asprintf(tree, "%s/%s", ctdb->event_script_dir, de->d_name);
149 if (stat(str, &st) != 0) {
150 DEBUG(DEBUG_ERR,("Could not stat event script %s. Ignoring this event script\n", str));
153 if (!(st.st_mode & S_IXUSR)) {
154 DEBUG(DEBUG_ERR,("Event script %s is not executable. Ignoring this event script\n", str));
159 /* store the event script in the tree */
160 trbt_insert32(tree, (num<<16)|count++, talloc_strdup(tree, de->d_name));
164 /* fetch the scripts from the tree one by one and execute
167 while ((script=trbt_findfirstarray32(tree, 1)) != NULL) {
168 cmdstr = talloc_asprintf(tmp_ctx, "%s/%s %s",
169 ctdb->event_script_dir,
171 CTDB_NO_MEMORY(ctdb, cmdstr);
173 DEBUG(DEBUG_INFO,("Executing event script %s\n",cmdstr));
175 child_state.start = timeval_current();
176 child_state.script_running = cmdstr;
178 ret = system(cmdstr);
179 /* if the system() call was successful, translate ret into the
180 return code from the command
183 ret = WEXITSTATUS(ret);
185 /* return an error if the script failed */
187 DEBUG(DEBUG_ERR,("Event script %s failed with error %d\n", cmdstr, ret));
188 talloc_free(tmp_ctx);
192 /* remove this script from the tree */
196 child_state.start = timeval_current();
197 child_state.script_running = "finished";
199 talloc_free(tmp_ctx);
203 /* called when child is finished */
204 static void ctdb_event_script_handler(struct event_context *ev, struct fd_event *fde,
205 uint16_t flags, void *p)
207 struct ctdb_event_script_state *state =
208 talloc_get_type(p, struct ctdb_event_script_state);
209 void (*callback)(struct ctdb_context *, int, void *) = state->callback;
210 void *private_data = state->private_data;
211 struct ctdb_context *ctdb = state->ctdb;
214 read(state->fd[0], &rt, sizeof(rt));
216 talloc_set_destructor(state, NULL);
218 callback(ctdb, rt, private_data);
220 ctdb->event_script_timeouts = 0;
223 static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
226 struct ctdb_ban_info b;
230 b.ban_time = ban_period;
232 data.dptr = (uint8_t *)&b;
233 data.dsize = sizeof(b);
235 ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
236 CTDB_SRVID_BAN_NODE, data);
238 DEBUG(DEBUG_ERR,(__location__ " Failed to send ban message\n"));
243 /* called when child times out */
244 static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te,
245 struct timeval t, void *p)
247 struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
248 void (*callback)(struct ctdb_context *, int, void *) = state->callback;
249 void *private_data = state->private_data;
250 struct ctdb_context *ctdb = state->ctdb;
253 DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
255 options = talloc_strdup(ctdb, state->options);
256 CTDB_NO_MEMORY_VOID(ctdb, options);
259 if (!strcmp(options, "monitor")) {
260 /* if it is a monitor event, we allow it to "hang" a few times
261 before we declare it a failure and ban ourself (and make
264 DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
266 ctdb->event_script_timeouts++;
267 if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
268 ctdb->event_script_timeouts = 0;
269 DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
270 ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
271 callback(ctdb, -1, private_data);
273 callback(ctdb, 0, private_data);
275 } else if (!strcmp(options, "startup")) {
276 DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
277 callback(ctdb, -1, private_data);
279 /* if it is not a monitor event we ban ourself immediately */
280 DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
281 ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
282 callback(ctdb, -1, private_data);
285 talloc_free(options);
289 destroy a running event script
291 static int event_script_destructor(struct ctdb_event_script_state *state)
293 DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
294 kill(state->child, SIGTERM);
299 run the event script in the background, calling the callback when
302 static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
303 struct timeval timeout,
305 void (*callback)(struct ctdb_context *, int, void *),
307 const char *fmt, va_list ap)
309 struct ctdb_event_script_state *state;
312 state = talloc(mem_ctx, struct ctdb_event_script_state);
313 CTDB_NO_MEMORY(ctdb, state);
316 state->callback = callback;
317 state->private_data = private_data;
318 state->options = talloc_vasprintf(state, fmt, ap);
319 CTDB_NO_MEMORY(ctdb, state->options);
321 ret = pipe(state->fd);
327 state->child = fork();
329 if (state->child == (pid_t)-1) {
336 if (state->child == 0) {
340 if (ctdb->do_setsched) {
341 ctdb_restore_scheduler(ctdb);
343 set_close_on_exec(state->fd[1]);
344 rt = ctdb_event_script_v(ctdb, state->options);
345 while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
351 talloc_set_destructor(state, event_script_destructor);
355 event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
356 ctdb_event_script_handler, state);
358 if (!timeval_is_zero(&timeout)) {
359 event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
361 DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", state->options));
369 run the event script in the background, calling the callback when
372 int ctdb_event_script_callback(struct ctdb_context *ctdb,
373 struct timeval timeout,
375 void (*callback)(struct ctdb_context *, int, void *),
377 const char *fmt, ...)
383 ret = ctdb_event_script_callback_v(ctdb, timeout, mem_ctx, callback, private_data, fmt, ap);
390 struct callback_status {
396 called when ctdb_event_script() finishes
398 static void event_script_callback(struct ctdb_context *ctdb, int status, void *private_data)
400 struct callback_status *s = (struct callback_status *)private_data;
406 run the event script, waiting for it to complete. Used when the caller doesn't want to
407 continue till the event script has finished.
409 int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
413 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
414 struct callback_status status;
417 ret = ctdb_event_script_callback_v(ctdb, timeval_zero(), tmp_ctx, event_script_callback, &status, fmt, ap);
421 talloc_free(tmp_ctx);
428 while (status.done == false && event_loop_once(ctdb->ev) == 0) /* noop */;
430 talloc_free(tmp_ctx);
432 return status.status;
436 struct eventscript_callback_state {
437 struct ctdb_req_control *c;
441 called when takeip event finishes
443 static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,
446 struct eventscript_callback_state *state =
447 talloc_get_type(private_data, struct eventscript_callback_state);
449 ctdb_enable_monitoring(ctdb);
452 DEBUG(DEBUG_ERR,(__location__ " Failed to forcibly run eventscripts\n"));
453 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
458 /* the control succeeded */
459 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
465 A control to force running of the eventscripts from the ctdb client tool
467 int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
468 struct ctdb_req_control *c,
469 TDB_DATA indata, bool *async_reply)
472 struct eventscript_callback_state *state;
474 /* kill off any previous invokations of forced eventscripts */
475 if (ctdb->eventscripts_ctx) {
476 talloc_free(ctdb->eventscripts_ctx);
478 ctdb->eventscripts_ctx = talloc_new(ctdb);
479 CTDB_NO_MEMORY(ctdb, ctdb->eventscripts_ctx);
481 state = talloc(ctdb->eventscripts_ctx, struct eventscript_callback_state);
482 CTDB_NO_MEMORY(ctdb, state);
484 state->c = talloc_steal(state, c);
486 DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
488 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
489 DEBUG(DEBUG_ERR, (__location__ " Aborted running eventscript \"%s\" while in RECOVERY mode\n", indata.dptr));
493 ctdb_disable_monitoring(ctdb);
495 ret = ctdb_event_script_callback(ctdb,
496 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
497 state, run_eventscripts_callback, state,
498 (const char *)indata.dptr);
501 ctdb_enable_monitoring(ctdb);
502 DEBUG(DEBUG_ERR,(__location__ " Failed to run eventscripts with arguments %s\n", indata.dptr));
507 /* tell ctdb_control.c that we will be replying asynchronously */