ctdb-daemon: Stop using tevent compatibility definitions
[vlendec/samba-autobuild/.git] / ctdb / server / ctdb_monitor.c
1 /* 
2    monitoring links to all other nodes to detect dead nodes
3
4
5    Copyright (C) Ronnie Sahlberg 2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "includes.h"
22 #include "system/filesys.h"
23 #include "system/wait.h"
24 #include "../include/ctdb_private.h"
25 #include "common/system.h"
26
27 struct ctdb_monitor_state {
28         uint32_t monitoring_mode;
29         TALLOC_CTX *monitor_context;
30         uint32_t next_interval;
31 };
32
33 static void ctdb_check_health(struct tevent_context *ev,
34                               struct tevent_timer *te,
35                               struct timeval t, void *private_data);
36
37 /*
38   setup the notification script
39 */
40 int ctdb_set_notification_script(struct ctdb_context *ctdb, const char *script)
41 {
42         ctdb->notification_script = talloc_strdup(ctdb, script);
43         CTDB_NO_MEMORY(ctdb, ctdb->notification_script);
44         return 0;
45 }
46
47 static int ctdb_run_notification_script_child(struct ctdb_context *ctdb, const char *event)
48 {
49         struct stat st;
50         int ret;
51         char *cmd;
52
53         if (stat(ctdb->notification_script, &st) != 0) {
54                 DEBUG(DEBUG_ERR,("Could not stat notification script %s. Can not send notifications.\n", ctdb->notification_script));
55                 return -1;
56         }
57         if (!(st.st_mode & S_IXUSR)) {
58                 DEBUG(DEBUG_ERR,("Notification script %s is not executable.\n", ctdb->notification_script));
59                 return -1;
60         }
61
62         cmd = talloc_asprintf(ctdb, "%s %s\n", ctdb->notification_script, event);
63         CTDB_NO_MEMORY(ctdb, cmd);
64
65         ret = system(cmd);
66         /* if the system() call was successful, translate ret into the
67            return code from the command
68         */
69         if (ret != -1) {
70                 ret = WEXITSTATUS(ret);
71         }
72         if (ret != 0) {
73                 DEBUG(DEBUG_ERR,("Notification script \"%s\" failed with error %d\n", cmd, ret));
74         }
75
76         return ret;
77 }
78
79 void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event)
80 {
81         pid_t child;
82
83         if (ctdb->notification_script == NULL) {
84                 return;
85         }
86
87         child = ctdb_fork(ctdb);
88         if (child == (pid_t)-1) {
89                 DEBUG(DEBUG_ERR,("Failed to fork() a notification child process\n"));
90                 return;
91         }
92         if (child == 0) {
93                 int ret;
94
95                 ctdb_set_process_name("ctdb_notification");
96                 debug_extra = talloc_asprintf(NULL, "notification-%s:", event);
97                 ret = ctdb_run_notification_script_child(ctdb, event);
98                 if (ret != 0) {
99                         DEBUG(DEBUG_ERR,(__location__ " Notification script failed\n"));
100                 }
101                 _exit(0);
102         }
103
104         return;
105 }
106
107 /*
108   called when a health monitoring event script finishes
109  */
110 static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
111 {
112         struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
113         TDB_DATA data;
114         struct ctdb_node_flag_change c;
115         uint32_t next_interval;
116         int ret;
117         TDB_DATA rddata;
118         struct srvid_request rd;
119         const char *state_str = NULL;
120
121         c.pnn = ctdb->pnn;
122         c.old_flags = node->flags;
123
124         rd.pnn   = ctdb->pnn;
125         rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE;
126
127         rddata.dptr = (uint8_t *)&rd;
128         rddata.dsize = sizeof(rd);
129
130         if (status == -ECANCELED) {
131                 DEBUG(DEBUG_ERR,("Monitoring event was cancelled\n"));
132                 goto after_change_status;
133         }
134
135         if (status == -ETIME) {
136                 ctdb->event_script_timeouts++;
137
138                 if (ctdb->event_script_timeouts >= ctdb->tunable.script_timeout_count) {
139                         DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_timeout_count));
140                 } else {
141                         /* We pretend this is OK. */
142                         goto after_change_status;
143                 }
144         }
145
146         if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
147                 DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
148                 node->flags |= NODE_FLAGS_UNHEALTHY;
149                 ctdb->monitor->next_interval = 5;
150
151                 ctdb_run_notification_script(ctdb, "unhealthy");
152         } else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
153                 DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
154                 node->flags &= ~NODE_FLAGS_UNHEALTHY;
155                 ctdb->monitor->next_interval = 5;
156
157                 ctdb_run_notification_script(ctdb, "healthy");
158         }
159
160 after_change_status:
161         next_interval = ctdb->monitor->next_interval;
162
163         ctdb->monitor->next_interval *= 2;
164         if (ctdb->monitor->next_interval > ctdb->tunable.monitor_interval) {
165                 ctdb->monitor->next_interval = ctdb->tunable.monitor_interval;
166         }
167
168         tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
169                          timeval_current_ofs(next_interval, 0),
170                          ctdb_check_health, ctdb);
171
172         if (c.old_flags == node->flags) {
173                 return;
174         }
175
176         c.new_flags = node->flags;
177
178         data.dptr = (uint8_t *)&c;
179         data.dsize = sizeof(c);
180
181         /* ask the recovery daemon to push these changes out to all nodes */
182         ctdb_daemon_send_message(ctdb, ctdb->pnn,
183                                  CTDB_SRVID_PUSH_NODE_FLAGS, data);
184
185         if (c.new_flags & NODE_FLAGS_UNHEALTHY) {
186                 state_str = "UNHEALTHY";
187         } else {
188                 state_str = "HEALTHY";
189         }
190
191         /* ask the recmaster to reallocate all addresses */
192         DEBUG(DEBUG_ERR,
193               ("Node became %s. Ask recovery master to reallocate IPs\n",
194                state_str));
195         ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_TAKEOVER_RUN, rddata);
196         if (ret != 0) {
197                 DEBUG(DEBUG_ERR,
198                       (__location__
199                        " Failed to send IP takeover run request\n"));
200         }
201 }
202
203
204 static void ctdb_run_startup(struct tevent_context *ev,
205                              struct tevent_timer *te,
206                              struct timeval t, void *private_data);
207 /*
208   called when the startup event script finishes
209  */
210 static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p)
211 {
212         if (status != 0) {
213                 DEBUG(DEBUG_ERR,("startup event failed\n"));
214                 tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
215                                  timeval_current_ofs(5, 0),
216                                  ctdb_run_startup, ctdb);
217                 return;
218         }
219
220         DEBUG(DEBUG_NOTICE,("startup event OK - enabling monitoring\n"));
221         ctdb_set_runstate(ctdb, CTDB_RUNSTATE_RUNNING);
222         ctdb->monitor->next_interval = 2;
223         ctdb_run_notification_script(ctdb, "startup");
224
225         ctdb->monitor->monitoring_mode = CTDB_MONITORING_ACTIVE;
226
227         tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
228                          timeval_current_ofs(ctdb->monitor->next_interval, 0),
229                          ctdb_check_health, ctdb);
230 }
231
232 static void ctdb_run_startup(struct tevent_context *ev,
233                              struct tevent_timer *te,
234                              struct timeval t, void *private_data)
235 {
236         struct ctdb_context *ctdb = talloc_get_type(private_data,
237                                                     struct ctdb_context);
238         int ret;
239
240         /* This is necessary to avoid the "startup" event colliding
241          * with the "ipreallocated" event from the takeover run
242          * following the first recovery.  We might as well serialise
243          * these things if we can.
244          */
245         if (ctdb->runstate < CTDB_RUNSTATE_STARTUP) {
246                 DEBUG(DEBUG_NOTICE,
247                       ("Not yet in startup runstate. Wait one more second\n"));
248                 tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
249                                  timeval_current_ofs(1, 0),
250                                  ctdb_run_startup, ctdb);
251                 return;
252         }
253
254         /* release any IPs we hold from previous runs of the daemon */
255         ctdb_release_all_ips(ctdb);
256
257         DEBUG(DEBUG_NOTICE,("Running the \"startup\" event.\n"));
258         ret = ctdb_event_script_callback(ctdb,
259                                          ctdb->monitor->monitor_context,
260                                          ctdb_startup_callback,
261                                          ctdb, CTDB_EVENT_STARTUP, "%s", "");
262
263         if (ret != 0) {
264                 DEBUG(DEBUG_ERR,("Unable to launch startup event script\n"));
265                 tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
266                                  timeval_current_ofs(5, 0),
267                                  ctdb_run_startup, ctdb);
268         }
269 }
270
271 /*
272   wait until we have finished initial recoveries before we start the
273   monitoring events
274  */
275 static void ctdb_wait_until_recovered(struct tevent_context *ev,
276                                       struct tevent_timer *te,
277                                       struct timeval t, void *private_data)
278 {
279         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
280         int ret;
281         static int count = 0;
282
283         count++;
284
285         if (count < 60 || count%600 == 0) { 
286                 DEBUG(DEBUG_NOTICE,("CTDB_WAIT_UNTIL_RECOVERED\n"));
287                 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_STOPPED) {
288                         DEBUG(DEBUG_NOTICE,("Node is STOPPED. Node will NOT recover.\n"));
289                 }
290         }
291
292         if (ctdb->vnn_map->generation == INVALID_GENERATION) {
293                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
294
295                 tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
296                                  timeval_current_ofs(1, 0),
297                                  ctdb_wait_until_recovered, ctdb);
298                 return;
299         }
300
301         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
302                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
303
304                 DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n"));
305                 tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
306                                  timeval_current_ofs(1, 0),
307                                  ctdb_wait_until_recovered, ctdb);
308                 return;
309         }
310
311
312         if (!fast_start && timeval_elapsed(&ctdb->last_recovery_finished) < (ctdb->tunable.rerecovery_timeout + 3)) {
313                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
314
315                 DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n"));
316
317                 tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
318                                  timeval_current_ofs(1, 0),
319                                  ctdb_wait_until_recovered, ctdb);
320                 return;
321         }
322
323         if (ctdb->vnn_map->generation == ctdb->db_persistent_startup_generation) {
324                 DEBUG(DEBUG_INFO,(__location__ " skip ctdb_recheck_persistent_health() "
325                                   "until the next recovery\n"));
326                 tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
327                                  timeval_current_ofs(1, 0),
328                                  ctdb_wait_until_recovered, ctdb);
329                 return;
330         }
331
332         ctdb->db_persistent_startup_generation = ctdb->vnn_map->generation;
333         ret = ctdb_recheck_persistent_health(ctdb);
334         if (ret != 0) {
335                 ctdb->db_persistent_check_errors++;
336                 if (ctdb->db_persistent_check_errors < ctdb->max_persistent_check_errors) {
337                         DEBUG(ctdb->db_persistent_check_errors==1?DEBUG_ERR:DEBUG_WARNING,
338                               (__location__ "ctdb_recheck_persistent_health() "
339                               "failed (%llu of %llu times) - retry later\n",
340                               (unsigned long long)ctdb->db_persistent_check_errors,
341                               (unsigned long long)ctdb->max_persistent_check_errors));
342                         tevent_add_timer(ctdb->ev,
343                                          ctdb->monitor->monitor_context,
344                                          timeval_current_ofs(1, 0),
345                                          ctdb_wait_until_recovered, ctdb);
346                         return;
347                 }
348                 DEBUG(DEBUG_ALERT,(__location__
349                                   "ctdb_recheck_persistent_health() failed (%llu times) - prepare shutdown\n",
350                                   (unsigned long long)ctdb->db_persistent_check_errors));
351                 ctdb_shutdown_sequence(ctdb, 11);
352                 /* In case above returns due to duplicate shutdown */
353                 return;
354         }
355         ctdb->db_persistent_check_errors = 0;
356
357         tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
358                          timeval_current(), ctdb_run_startup, ctdb);
359 }
360
361
362 /*
363   see if the event scripts think we are healthy
364  */
365 static void ctdb_check_health(struct tevent_context *ev,
366                               struct tevent_timer *te,
367                               struct timeval t, void *private_data)
368 {
369         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
370         bool skip_monitoring = false;
371         int ret = 0;
372
373         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL ||
374             ctdb->monitor->monitoring_mode == CTDB_MONITORING_DISABLED) {
375                 skip_monitoring = true;
376         } else {
377                 if (ctdb_db_all_frozen(ctdb)) {
378                         DEBUG(DEBUG_ERR,
379                               ("Skip monitoring since databases are frozen\n"));
380                         skip_monitoring = true;
381                 }
382         }
383
384         if (skip_monitoring) {
385                 tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
386                                  timeval_current_ofs(ctdb->monitor->next_interval, 0),
387                                  ctdb_check_health, ctdb);
388                 return;
389         }
390
391         ret = ctdb_event_script_callback(ctdb,
392                                          ctdb->monitor->monitor_context,
393                                          ctdb_health_callback,
394                                          ctdb, CTDB_EVENT_MONITOR, "%s", "");
395         if (ret != 0) {
396                 DEBUG(DEBUG_ERR,("Unable to launch monitor event script\n"));
397                 ctdb->monitor->next_interval = 5;
398                 tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
399                                  timeval_current_ofs(5, 0),
400                                  ctdb_check_health, ctdb);
401         }
402 }
403
404 /* 
405   (Temporaily) Disabling monitoring will stop the monitor event scripts
406   from running   but node health checks will still occur
407 */
408 void ctdb_disable_monitoring(struct ctdb_context *ctdb)
409 {
410         ctdb->monitor->monitoring_mode = CTDB_MONITORING_DISABLED;
411         DEBUG(DEBUG_INFO,("Monitoring has been disabled\n"));
412 }
413
414 /* 
415    Re-enable running monitor events after they have been disabled
416  */
417 void ctdb_enable_monitoring(struct ctdb_context *ctdb)
418 {
419         ctdb->monitor->monitoring_mode  = CTDB_MONITORING_ACTIVE;
420         ctdb->monitor->next_interval = 5;
421         DEBUG(DEBUG_INFO,("Monitoring has been enabled\n"));
422 }
423
424 /* stop any monitoring 
425    this should only be done when shutting down the daemon
426 */
427 void ctdb_stop_monitoring(struct ctdb_context *ctdb)
428 {
429         talloc_free(ctdb->monitor->monitor_context);
430         ctdb->monitor->monitor_context = NULL;
431
432         ctdb->monitor->monitoring_mode  = CTDB_MONITORING_DISABLED;
433         ctdb->monitor->next_interval = 5;
434         DEBUG(DEBUG_NOTICE,("Monitoring has been stopped\n"));
435 }
436
437 /*
438   start watching for nodes that might be dead
439  */
440 void ctdb_wait_for_first_recovery(struct ctdb_context *ctdb)
441 {
442         ctdb_set_runstate(ctdb, CTDB_RUNSTATE_FIRST_RECOVERY);
443
444         ctdb->monitor = talloc(ctdb, struct ctdb_monitor_state);
445         CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor);
446
447         ctdb->monitor->monitor_context = talloc_new(ctdb->monitor);
448         CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor->monitor_context);
449
450         tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
451                          timeval_current_ofs(1, 0),
452                          ctdb_wait_until_recovered, ctdb);
453 }
454
455
456 /*
457   modify flags on a node
458  */
459 int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
460 {
461         struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
462         struct ctdb_node *node;
463         uint32_t old_flags;
464
465         if (c->pnn >= ctdb->num_nodes) {
466                 DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes));
467                 return -1;
468         }
469
470         node         = ctdb->nodes[c->pnn];
471         old_flags    = node->flags;
472         if (c->pnn != ctdb->pnn) {
473                 c->old_flags  = node->flags;
474         }
475         node->flags   = c->new_flags & ~NODE_FLAGS_DISCONNECTED;
476         node->flags  |= (c->old_flags & NODE_FLAGS_DISCONNECTED);
477
478         /* we dont let other nodes modify our STOPPED status */
479         if (c->pnn == ctdb->pnn) {
480                 node->flags &= ~NODE_FLAGS_STOPPED;
481                 if (old_flags & NODE_FLAGS_STOPPED) {
482                         node->flags |= NODE_FLAGS_STOPPED;
483                 }
484         }
485
486         /* we dont let other nodes modify our BANNED status */
487         if (c->pnn == ctdb->pnn) {
488                 node->flags &= ~NODE_FLAGS_BANNED;
489                 if (old_flags & NODE_FLAGS_BANNED) {
490                         node->flags |= NODE_FLAGS_BANNED;
491                 }
492         }
493
494         if (node->flags == c->old_flags) {
495                 DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags));
496                 return 0;
497         }
498
499         DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags));
500
501         if (node->flags == 0 && ctdb->runstate <= CTDB_RUNSTATE_STARTUP) {
502                 DEBUG(DEBUG_ERR, (__location__ " Node %u became healthy - force recovery for startup\n",
503                                   c->pnn));
504                 ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
505         }
506
507         /* tell the recovery daemon something has changed */
508         c->new_flags = node->flags;
509         ctdb_daemon_send_message(ctdb, ctdb->pnn,
510                                  CTDB_SRVID_SET_NODE_FLAGS, indata);
511
512         /* if we have become banned, we should go into recovery mode */
513         if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) {
514                 ctdb_local_node_got_banned(ctdb);
515         }
516         
517         return 0;
518 }
519
520 /*
521   return the monitoring mode
522  */
523 int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb)
524 {
525         if (ctdb->monitor == NULL) {
526                 return CTDB_MONITORING_DISABLED;
527         }
528         return ctdb->monitor->monitoring_mode;
529 }
530
531 /*
532  * Check if monitoring has been stopped
533  */
534 bool ctdb_stopped_monitoring(struct ctdb_context *ctdb)
535 {
536         return (ctdb->monitor->monitor_context == NULL ? true : false);
537 }