4 Copyright (C) Andrew Tridgell 2007
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "system/filesys.h"
22 #include "system/wait.h"
23 #include "system/dir.h"
24 #include "system/locale.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/events/events.h"
27 #include "../common/rb_tree.h"
31 const char *script_running;
35 ctdbd sends us a SIGTERM when we should time out the current script
37 static void sigterm(int sig)
39 DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds\n",
40 child_state.script_running, timeval_elapsed(&child_state.start)));
41 /* all the child processes will be running in the same process group */
42 kill(-getpgrp(), SIGKILL);
46 struct ctdb_event_script_state {
47 struct ctdb_context *ctdb;
49 void (*callback)(struct ctdb_context *, int, void *);
56 struct ctdb_monitor_script_status {
57 struct ctdb_monitor_script_status *next;
60 struct timeval finished;
66 struct ctdb_monitoring_status {
68 struct timeval finished;
70 struct ctdb_monitor_script_status *scripts;
74 /* called from ctdb_logging when we have received output on STDERR from
75 * one of the eventscripts
77 int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t len)
79 struct ctdb_monitoring_status *monitoring_status =
80 talloc_get_type(ctdb->script_monitoring_ctx,
81 struct ctdb_monitoring_status);
82 struct ctdb_monitor_script_status *script;
84 if (monitoring_status == NULL) {
88 script = monitoring_status->scripts;
93 if (script->output == NULL) {
94 script->output = talloc_asprintf(script, "%*.*s", len, len, str);
96 script->output = talloc_asprintf_append(script->output, "%*.*s", len, len, str);
102 /* called from the event script child process when we are starting a new
105 int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb)
107 struct ctdb_monitoring_status *monitoring_status;
109 DEBUG(DEBUG_INFO, ("event script init called\n"));
110 if (ctdb->script_monitoring_ctx != NULL) {
111 talloc_free(ctdb->script_monitoring_ctx);
112 ctdb->script_monitoring_ctx = NULL;
115 monitoring_status = talloc_zero(ctdb, struct ctdb_monitoring_status);
116 if (monitoring_status == NULL) {
117 DEBUG(DEBUG_ERR, (__location__ " ERROR: Failed to talloc script_monitoring context\n"));
121 ctdb->script_monitoring_ctx = monitoring_status;
122 monitoring_status->start = timeval_current();
128 /* called from the event script child process when we are star running
131 int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA indata)
133 const char *name = (const char *)indata.dptr;
134 struct ctdb_monitoring_status *monitoring_status =
135 talloc_get_type(ctdb->script_monitoring_ctx,
136 struct ctdb_monitoring_status);
137 struct ctdb_monitor_script_status *script;
139 DEBUG(DEBUG_INFO, ("event script start called : %s\n", name));
141 if (monitoring_status == NULL) {
142 DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when starting to run script %s\n", name));
146 script = talloc_zero(monitoring_status, struct ctdb_monitor_script_status);
147 if (script == NULL) {
148 DEBUG(DEBUG_ERR,(__location__ " Failed to talloc ctdb_monitor_script_status for script %s\n", name));
152 script->next = monitoring_status->scripts;
153 script->name = talloc_strdup(script, name);
154 script->start = timeval_current();
155 monitoring_status->scripts = script;
160 /* called from the event script child process when we have finished running
163 int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indata)
165 int32_t res = *((int32_t *)indata.dptr);
166 struct ctdb_monitoring_status *monitoring_status =
167 talloc_get_type(ctdb->script_monitoring_ctx,
168 struct ctdb_monitoring_status);
169 struct ctdb_monitor_script_status *script;
171 DEBUG(DEBUG_INFO, ("event script stop called : %d\n", (int)res));
173 if (monitoring_status == NULL) {
174 DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when script finished.\n"));
178 script = monitoring_status->scripts;
179 if (script == NULL) {
180 DEBUG(DEBUG_ERR,(__location__ " script is NULL when the script had finished\n"));
184 script->finished = timeval_current();
185 script->status = res;
190 /* called from the event script child process when we have completed a
193 int32_t ctdb_control_event_script_finished(struct ctdb_context *ctdb)
195 struct ctdb_monitoring_status *monitoring_status =
196 talloc_get_type(ctdb->script_monitoring_ctx,
197 struct ctdb_monitoring_status);
199 DEBUG(DEBUG_INFO, ("event script finished called\n"));
201 if (monitoring_status == NULL) {
202 DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when monitoring event finished\n"));
206 monitoring_status->finished = timeval_current();
207 monitoring_status->status = MONITOR_SCRIPT_OK;
208 if (ctdb->last_monitoring_ctx) {
209 talloc_free(ctdb->last_monitoring_ctx);
211 ctdb->last_monitoring_ctx = ctdb->script_monitoring_ctx;
212 ctdb->script_monitoring_ctx = NULL;
217 static struct ctdb_monitoring_wire *marshall_monitoring_scripts(TALLOC_CTX *mem_ctx, struct ctdb_monitoring_wire *monitoring_scripts, struct ctdb_monitor_script_status *script)
219 struct ctdb_monitoring_script_wire script_wire;
222 if (script == NULL) {
223 return monitoring_scripts;
225 monitoring_scripts = marshall_monitoring_scripts(mem_ctx, monitoring_scripts, script->next);
226 if (monitoring_scripts == NULL) {
230 bzero(&script_wire, sizeof(struct ctdb_monitoring_script_wire));
231 strncpy(script_wire.name, script->name, MAX_SCRIPT_NAME);
232 script_wire.start = script->start;
233 script_wire.finished = script->finished;
234 script_wire.status = script->status;
235 script_wire.timedout = script->timedout;
236 if (script->output != NULL) {
237 strncpy(script_wire.output, script->output, MAX_SCRIPT_OUTPUT);
240 size = talloc_get_size(monitoring_scripts);
241 monitoring_scripts = talloc_realloc_size(mem_ctx, monitoring_scripts, size + sizeof(struct ctdb_monitoring_script_wire));
242 if (monitoring_scripts == NULL) {
243 DEBUG(DEBUG_ERR,(__location__ " Failed to talloc_resize monitoring_scripts blob\n"));
247 memcpy(&monitoring_scripts->scripts[monitoring_scripts->num_scripts], &script_wire, sizeof(script_wire));
248 monitoring_scripts->num_scripts++;
250 return monitoring_scripts;
253 int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA *outdata)
255 struct ctdb_monitoring_status *monitoring_status =
256 talloc_get_type(ctdb->last_monitoring_ctx,
257 struct ctdb_monitoring_status);
258 struct ctdb_monitoring_wire *monitoring_scripts;
260 if (monitoring_status == NULL) {
261 DEBUG(DEBUG_ERR,(__location__ " last_monitor_ctx is NULL when reading status\n"));
265 monitoring_scripts = talloc_size(outdata, offsetof(struct ctdb_monitoring_wire, scripts));
266 if (monitoring_scripts == NULL) {
267 DEBUG(DEBUG_ERR,(__location__ " failed to talloc monitoring_scripts structure\n"));
271 monitoring_scripts->num_scripts = 0;
272 monitoring_scripts = marshall_monitoring_scripts(outdata, monitoring_scripts, monitoring_status->scripts);
273 if (monitoring_scripts == NULL) {
274 DEBUG(DEBUG_ERR,(__location__ " Monitoring scritps is NULL. can not return data to client\n"));
278 outdata->dsize = talloc_get_size(monitoring_scripts);
279 outdata->dptr = (uint8_t *)monitoring_scripts;
285 run the event script - varargs version
286 this function is called and run in the context of a forked child
287 which allows it to do blocking calls such as system()
289 static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
294 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
302 /* This is running in the forked child process. At this stage
303 * we want to switch from being a ctdb daemon into being a client
304 * and connect to the local daemon.
306 if (switch_from_server_to_client(ctdb) != 0) {
307 DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch eventscript child into client mode. shutting down.\n"));
311 if (!strcmp(options, "monitor")) {
314 if (is_monitor == 1) {
315 if (ctdb_ctrl_event_script_init(ctdb) != 0) {
316 DEBUG(DEBUG_ERR,(__location__ " Failed to init event script monitoring\n"));
317 talloc_free(tmp_ctx);
322 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
323 /* we guarantee that only some specifically allowed event scripts are run
325 const char *allowed_scripts[] = {"startrecovery", "shutdown", "releaseip" };
327 for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) {
328 if (strncmp(options, allowed_scripts[i], strlen(allowed_scripts[i])) == 0) break;
330 if (i == ARRAY_SIZE(allowed_scripts)) {
331 DEBUG(DEBUG_ERR,("Refusing to run event scripts with option '%s' while in recovery\n",
333 talloc_free(tmp_ctx);
338 if (setpgid(0,0) != 0) {
339 DEBUG(DEBUG_ERR,("Failed to create process group for event scripts - %s\n",
341 talloc_free(tmp_ctx);
345 signal(SIGTERM, sigterm);
347 child_state.start = timeval_current();
348 child_state.script_running = "startup";
351 the service specific event scripts
353 if (stat(ctdb->event_script_dir, &st) != 0 &&
355 DEBUG(DEBUG_CRIT,("No event script directory found at '%s'\n", ctdb->event_script_dir));
356 talloc_free(tmp_ctx);
360 /* create a tree to store all the script names in */
361 tree = trbt_create(tmp_ctx, 0);
363 /* scan all directory entries and insert all valid scripts into the
366 dir = opendir(ctdb->event_script_dir);
368 DEBUG(DEBUG_CRIT,("Failed to open event script directory '%s'\n", ctdb->event_script_dir));
369 talloc_free(tmp_ctx);
374 while ((de=readdir(dir)) != NULL) {
379 namlen = strlen(de->d_name);
385 if (de->d_name[namlen-1] == '~') {
386 /* skip files emacs left behind */
390 if (de->d_name[2] != '.') {
394 if (sscanf(de->d_name, "%02u.", &num) != 1) {
398 /* Make sure the event script is executable */
399 str = talloc_asprintf(tree, "%s/%s", ctdb->event_script_dir, de->d_name);
400 if (stat(str, &st) != 0) {
401 DEBUG(DEBUG_ERR,("Could not stat event script %s. Ignoring this event script\n", str));
404 if (!(st.st_mode & S_IXUSR)) {
405 DEBUG(DEBUG_ERR,("Event script %s is not executable. Ignoring this event script\n", str));
410 /* store the event script in the tree */
411 trbt_insert32(tree, (num<<16)|count++, talloc_strdup(tree, de->d_name));
415 /* fetch the scripts from the tree one by one and execute
418 while ((script=trbt_findfirstarray32(tree, 1)) != NULL) {
419 cmdstr = talloc_asprintf(tmp_ctx, "%s/%s %s",
420 ctdb->event_script_dir,
422 CTDB_NO_MEMORY(ctdb, cmdstr);
424 DEBUG(DEBUG_INFO,("Executing event script %s\n",cmdstr));
426 child_state.start = timeval_current();
427 child_state.script_running = cmdstr;
429 if (is_monitor == 1) {
430 if (ctdb_ctrl_event_script_start(ctdb, script) != 0) {
431 DEBUG(DEBUG_ERR,(__location__ " Failed to start event script monitoring\n"));
432 talloc_free(tmp_ctx);
437 ret = system(cmdstr);
438 /* if the system() call was successful, translate ret into the
439 return code from the command
442 ret = WEXITSTATUS(ret);
444 if (is_monitor == 1) {
445 if (ctdb_ctrl_event_script_stop(ctdb, ret) != 0) {
446 DEBUG(DEBUG_ERR,(__location__ " Failed to stop event script monitoring\n"));
447 talloc_free(tmp_ctx);
452 /* return an error if the script failed */
454 DEBUG(DEBUG_ERR,("Event script %s failed with error %d\n", cmdstr, ret));
455 if (is_monitor == 1) {
456 if (ctdb_ctrl_event_script_finished(ctdb) != 0) {
457 DEBUG(DEBUG_ERR,(__location__ " Failed to finish event script monitoring\n"));
458 talloc_free(tmp_ctx);
463 talloc_free(tmp_ctx);
467 /* remove this script from the tree */
471 child_state.start = timeval_current();
472 child_state.script_running = "finished";
474 if (is_monitor == 1) {
475 if (ctdb_ctrl_event_script_finished(ctdb) != 0) {
476 DEBUG(DEBUG_ERR,(__location__ " Failed to finish event script monitoring\n"));
477 talloc_free(tmp_ctx);
482 talloc_free(tmp_ctx);
486 /* called when child is finished */
487 static void ctdb_event_script_handler(struct event_context *ev, struct fd_event *fde,
488 uint16_t flags, void *p)
490 struct ctdb_event_script_state *state =
491 talloc_get_type(p, struct ctdb_event_script_state);
492 void (*callback)(struct ctdb_context *, int, void *) = state->callback;
493 void *private_data = state->private_data;
494 struct ctdb_context *ctdb = state->ctdb;
497 read(state->fd[0], &rt, sizeof(rt));
499 talloc_set_destructor(state, NULL);
501 callback(ctdb, rt, private_data);
503 ctdb->event_script_timeouts = 0;
506 static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
509 struct ctdb_ban_info b;
513 b.ban_time = ban_period;
515 data.dptr = (uint8_t *)&b;
516 data.dsize = sizeof(b);
518 ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
519 CTDB_SRVID_BAN_NODE, data);
521 DEBUG(DEBUG_ERR,(__location__ " Failed to send ban message\n"));
526 /* called when child times out */
527 static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te,
528 struct timeval t, void *p)
530 struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
531 void (*callback)(struct ctdb_context *, int, void *) = state->callback;
532 void *private_data = state->private_data;
533 struct ctdb_context *ctdb = state->ctdb;
535 struct ctdb_monitoring_status *monitoring_status =
536 talloc_get_type(ctdb->script_monitoring_ctx,
537 struct ctdb_monitoring_status);
539 DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
541 options = talloc_strdup(ctdb, state->options);
542 CTDB_NO_MEMORY_VOID(ctdb, options);
545 if (!strcmp(options, "monitor")) {
546 /* if it is a monitor event, we allow it to "hang" a few times
547 before we declare it a failure and ban ourself (and make
550 DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
552 ctdb->event_script_timeouts++;
553 if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
554 ctdb->event_script_timeouts = 0;
555 DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
556 ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
557 callback(ctdb, -1, private_data);
559 callback(ctdb, 0, private_data);
561 } else if (!strcmp(options, "startup")) {
562 DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
563 callback(ctdb, -1, private_data);
565 /* if it is not a monitor event we ban ourself immediately */
566 DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
567 ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
568 callback(ctdb, -1, private_data);
571 if (monitoring_status != NULL) {
572 struct ctdb_monitor_script_status *script;
574 script = monitoring_status->scripts;
575 if (script != NULL) {
576 script->timedout = 1;
578 monitoring_status->status = MONITOR_SCRIPT_TIMEOUT;
579 if (ctdb->last_monitoring_ctx) {
580 talloc_free(ctdb->last_monitoring_ctx);
581 ctdb->last_monitoring_ctx = ctdb->script_monitoring_ctx;
582 ctdb->script_monitoring_ctx = NULL;
586 talloc_free(options);
590 destroy a running event script
592 static int event_script_destructor(struct ctdb_event_script_state *state)
594 DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
595 kill(state->child, SIGTERM);
600 run the event script in the background, calling the callback when
603 static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
604 struct timeval timeout,
606 void (*callback)(struct ctdb_context *, int, void *),
608 const char *fmt, va_list ap)
610 struct ctdb_event_script_state *state;
613 state = talloc(mem_ctx, struct ctdb_event_script_state);
614 CTDB_NO_MEMORY(ctdb, state);
617 state->callback = callback;
618 state->private_data = private_data;
619 state->options = talloc_vasprintf(state, fmt, ap);
620 CTDB_NO_MEMORY(ctdb, state->options);
622 ret = pipe(state->fd);
628 state->child = fork();
630 if (state->child == (pid_t)-1) {
637 if (state->child == 0) {
641 set_close_on_exec(state->fd[1]);
643 rt = ctdb_event_script_v(ctdb, state->options);
644 while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
650 talloc_set_destructor(state, event_script_destructor);
654 event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
655 ctdb_event_script_handler, state);
657 if (!timeval_is_zero(&timeout)) {
658 event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
660 DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", state->options));
668 run the event script in the background, calling the callback when
671 int ctdb_event_script_callback(struct ctdb_context *ctdb,
672 struct timeval timeout,
674 void (*callback)(struct ctdb_context *, int, void *),
676 const char *fmt, ...)
682 ret = ctdb_event_script_callback_v(ctdb, timeout, mem_ctx, callback, private_data, fmt, ap);
689 struct callback_status {
695 called when ctdb_event_script() finishes
697 static void event_script_callback(struct ctdb_context *ctdb, int status, void *private_data)
699 struct callback_status *s = (struct callback_status *)private_data;
705 run the event script, waiting for it to complete. Used when the caller doesn't want to
706 continue till the event script has finished.
708 int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
712 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
713 struct callback_status status;
716 ret = ctdb_event_script_callback_v(ctdb, timeval_zero(), tmp_ctx, event_script_callback, &status, fmt, ap);
720 talloc_free(tmp_ctx);
727 while (status.done == false && event_loop_once(ctdb->ev) == 0) /* noop */;
729 talloc_free(tmp_ctx);
731 return status.status;
735 struct eventscript_callback_state {
736 struct ctdb_req_control *c;
740 called when takeip event finishes
742 static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,
745 struct eventscript_callback_state *state =
746 talloc_get_type(private_data, struct eventscript_callback_state);
748 ctdb_enable_monitoring(ctdb);
751 DEBUG(DEBUG_ERR,(__location__ " Failed to forcibly run eventscripts\n"));
752 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
757 /* the control succeeded */
758 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
764 A control to force running of the eventscripts from the ctdb client tool
766 int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
767 struct ctdb_req_control *c,
768 TDB_DATA indata, bool *async_reply)
771 struct eventscript_callback_state *state;
773 /* kill off any previous invokations of forced eventscripts */
774 if (ctdb->eventscripts_ctx) {
775 talloc_free(ctdb->eventscripts_ctx);
777 ctdb->eventscripts_ctx = talloc_new(ctdb);
778 CTDB_NO_MEMORY(ctdb, ctdb->eventscripts_ctx);
780 state = talloc(ctdb->eventscripts_ctx, struct eventscript_callback_state);
781 CTDB_NO_MEMORY(ctdb, state);
783 state->c = talloc_steal(state, c);
785 DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
787 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
788 DEBUG(DEBUG_ERR, (__location__ " Aborted running eventscript \"%s\" while in RECOVERY mode\n", indata.dptr));
792 ctdb_disable_monitoring(ctdb);
794 ret = ctdb_event_script_callback(ctdb,
795 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
796 state, run_eventscripts_callback, state,
797 (const char *)indata.dptr);
800 ctdb_enable_monitoring(ctdb);
801 DEBUG(DEBUG_ERR,(__location__ " Failed to run eventscripts with arguments %s\n", indata.dptr));
806 /* tell ctdb_control.c that we will be replying asynchronously */