ctdb/server/ctdb_monitor.c

   1 /*
   2    monitoring links to all other nodes to detect dead nodes
   3
   4
   5    Copyright (C) Ronnie Sahlberg 2007
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #include "includes.h"
  22 #include "system/filesys.h"
  23 #include "system/wait.h"
  24 #include "../include/ctdb_private.h"
  25
  26 struct ctdb_monitor_state {
  27         uint32_t monitoring_mode;
  28         TALLOC_CTX *monitor_context;
  29         uint32_t next_interval;
  30 };
  31
  32 static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
  33                               struct timeval t, void *private_data);
  34
  35 /*
  36   setup the notification script
  37 */
  38 int ctdb_set_notification_script(struct ctdb_context *ctdb, const char *script)
  39 {
  40         ctdb->notification_script = talloc_strdup(ctdb, script);
  41         CTDB_NO_MEMORY(ctdb, ctdb->notification_script);
  42         return 0;
  43 }
  44
  45 static int ctdb_run_notification_script_child(struct ctdb_context *ctdb, const char *event)
  46 {
  47         struct stat st;
  48         int ret;
  49         char *cmd;
  50
  51         if (stat(ctdb->notification_script, &st) != 0) {
  52                 DEBUG(DEBUG_ERR,("Could not stat notification script %s. Can not send notifications.\n", ctdb->notification_script));
  53                 return -1;
  54         }
  55         if (!(st.st_mode & S_IXUSR)) {
  56                 DEBUG(DEBUG_ERR,("Notification script %s is not executable.\n", ctdb->notification_script));
  57                 return -1;
  58         }
  59
  60         cmd = talloc_asprintf(ctdb, "%s %s\n", ctdb->notification_script, event);
  61         CTDB_NO_MEMORY(ctdb, cmd);
  62
  63         ret = system(cmd);
  64         /* if the system() call was successful, translate ret into the
  65            return code from the command
  66         */
  67         if (ret != -1) {
  68                 ret = WEXITSTATUS(ret);
  69         }
  70         if (ret != 0) {
  71                 DEBUG(DEBUG_ERR,("Notification script \"%s\" failed with error %d\n", cmd, ret));
  72         }
  73
  74         return ret;
  75 }
  76
  77 void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event)
  78 {
  79         pid_t child;
  80
  81         if (ctdb->notification_script == NULL) {
  82                 return;
  83         }
  84
  85         child = ctdb_fork(ctdb);
  86         if (child == (pid_t)-1) {
  87                 DEBUG(DEBUG_ERR,("Failed to fork() a notification child process\n"));
  88                 return;
  89         }
  90         if (child == 0) {
  91                 int ret;
  92
  93                 ctdb_set_process_name("ctdb_notification");
  94                 debug_extra = talloc_asprintf(NULL, "notification-%s:", event);
  95                 ret = ctdb_run_notification_script_child(ctdb, event);
  96                 if (ret != 0) {
  97                         DEBUG(DEBUG_ERR,(__location__ " Notification script failed\n"));
  98                 }
  99                 _exit(0);
 100         }
 101
 102         return;
 103 }
 104
 105 /*
 106   called when a health monitoring event script finishes
 107  */
 108 static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 109 {
 110         struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
 111         TDB_DATA data;
 112         struct ctdb_node_flag_change c;
 113         uint32_t next_interval;
 114         int ret;
 115         TDB_DATA rddata;
 116         struct srvid_request rd;
 117         const char *state_str = NULL;
 118
 119         c.pnn = ctdb->pnn;
 120         c.old_flags = node->flags;
 121
 122         rd.pnn   = ctdb->pnn;
 123         rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE;
 124
 125         rddata.dptr = (uint8_t *)&rd;
 126         rddata.dsize = sizeof(rd);
 127
 128         if (status == -ECANCELED) {
 129                 DEBUG(DEBUG_ERR,("Monitoring event was cancelled\n"));
 130                 goto after_change_status;
 131         }
 132
 133         if (status == -ETIME) {
 134                 ctdb->event_script_timeouts++;
 135
 136                 if (ctdb->event_script_timeouts >= ctdb->tunable.script_timeout_count) {
 137                         DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_timeout_count));
 138                 } else {
 139                         /* We pretend this is OK. */
 140                         goto after_change_status;
 141                 }
 142         }
 143
 144         if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
 145                 DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
 146                 node->flags |= NODE_FLAGS_UNHEALTHY;
 147                 ctdb->monitor->next_interval = 5;
 148
 149                 ctdb_run_notification_script(ctdb, "unhealthy");
 150         } else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
 151                 DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
 152                 node->flags &= ~NODE_FLAGS_UNHEALTHY;
 153                 ctdb->monitor->next_interval = 5;
 154
 155                 ctdb_run_notification_script(ctdb, "healthy");
 156         }
 157
 158 after_change_status:
 159         next_interval = ctdb->monitor->next_interval;
 160
 161         ctdb->monitor->next_interval *= 2;
 162         if (ctdb->monitor->next_interval > ctdb->tunable.monitor_interval) {
 163                 ctdb->monitor->next_interval = ctdb->tunable.monitor_interval;
 164         }
 165
 166         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 167                                 timeval_current_ofs(next_interval, 0),
 168                                 ctdb_check_health, ctdb);
 169
 170         if (c.old_flags == node->flags) {
 171                 return;
 172         }
 173
 174         c.new_flags = node->flags;
 175
 176         data.dptr = (uint8_t *)&c;
 177         data.dsize = sizeof(c);
 178
 179         /* ask the recovery daemon to push these changes out to all nodes */
 180         ctdb_daemon_send_message(ctdb, ctdb->pnn,
 181                                  CTDB_SRVID_PUSH_NODE_FLAGS, data);
 182
 183         if (c.new_flags & NODE_FLAGS_UNHEALTHY) {
 184                 state_str = "UNHEALTHY";
 185         } else {
 186                 state_str = "HEALTHY";
 187         }
 188
 189         /* ask the recmaster to reallocate all addresses */
 190         DEBUG(DEBUG_ERR,
 191               ("Node became %s. Ask recovery master to reallocate IPs\n",
 192                state_str));
 193         ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_TAKEOVER_RUN, rddata);
 194         if (ret != 0) {
 195                 DEBUG(DEBUG_ERR,
 196                       (__location__
 197                        " Failed to send IP takeover run request\n"));
 198         }
 199 }
 200
 201
 202 static void ctdb_run_startup(struct event_context *ev, struct timed_event *te,
 203                              struct timeval t, void *private_data);
 204 /*
 205   called when the startup event script finishes
 206  */
 207 static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p)
 208 {
 209         if (status != 0) {
 210                 DEBUG(DEBUG_ERR,("startup event failed\n"));
 211                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 212                                 timeval_current_ofs(5, 0),
 213                                 ctdb_run_startup, ctdb);
 214                 return;
 215         }
 216
 217         DEBUG(DEBUG_NOTICE,("startup event OK - enabling monitoring\n"));
 218         ctdb_set_runstate(ctdb, CTDB_RUNSTATE_RUNNING);
 219         ctdb->monitor->next_interval = 2;
 220         ctdb_run_notification_script(ctdb, "startup");
 221
 222         ctdb->monitor->monitoring_mode = CTDB_MONITORING_ACTIVE;
 223
 224         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 225                         timeval_current_ofs(ctdb->monitor->next_interval, 0),
 226                         ctdb_check_health, ctdb);
 227 }
 228
 229 static void ctdb_run_startup(struct event_context *ev, struct timed_event *te,
 230                              struct timeval t, void *private_data)
 231 {
 232         struct ctdb_context *ctdb = talloc_get_type(private_data,
 233                                                     struct ctdb_context);
 234         int ret;
 235
 236         /* This is necessary to avoid the "startup" event colliding
 237          * with the "ipreallocated" event from the takeover run
 238          * following the first recovery.  We might as well serialise
 239          * these things if we can.
 240          */
 241         if (ctdb->runstate < CTDB_RUNSTATE_STARTUP) {
 242                 DEBUG(DEBUG_NOTICE,
 243                       ("Not yet in startup runstate. Wait one more second\n"));
 244                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 245                                 timeval_current_ofs(1, 0),
 246                                 ctdb_run_startup, ctdb);
 247                 return;
 248         }
 249
 250         /* release any IPs we hold from previous runs of the daemon */
 251         ctdb_release_all_ips(ctdb);
 252
 253         DEBUG(DEBUG_NOTICE,("Running the \"startup\" event.\n"));
 254         ret = ctdb_event_script_callback(ctdb,
 255                                          ctdb->monitor->monitor_context,
 256                                          ctdb_startup_callback,
 257                                          ctdb, CTDB_EVENT_STARTUP, "%s", "");
 258
 259         if (ret != 0) {
 260                 DEBUG(DEBUG_ERR,("Unable to launch startup event script\n"));
 261                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 262                                 timeval_current_ofs(5, 0),
 263                                 ctdb_run_startup, ctdb);
 264         }
 265 }
 266
 267 /*
 268   wait until we have finished initial recoveries before we start the
 269   monitoring events
 270  */
 271 static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_event *te,
 272                               struct timeval t, void *private_data)
 273 {
 274         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 275         int ret;
 276         static int count = 0;
 277
 278         count++;
 279
 280         if (count < 60 || count%600 == 0) {
 281                 DEBUG(DEBUG_NOTICE,("CTDB_WAIT_UNTIL_RECOVERED\n"));
 282                 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_STOPPED) {
 283                         DEBUG(DEBUG_NOTICE,("Node is STOPPED. Node will NOT recover.\n"));
 284                 }
 285         }
 286
 287         if (ctdb->vnn_map->generation == INVALID_GENERATION) {
 288                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 289
 290                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 291                                      timeval_current_ofs(1, 0),
 292                                      ctdb_wait_until_recovered, ctdb);
 293                 return;
 294         }
 295
 296         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
 297                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 298
 299                 DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n"));
 300                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 301                                      timeval_current_ofs(1, 0),
 302                                      ctdb_wait_until_recovered, ctdb);
 303                 return;
 304         }
 305
 306
 307         if (!fast_start && timeval_elapsed(&ctdb->last_recovery_finished) < (ctdb->tunable.rerecovery_timeout + 3)) {
 308                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 309
 310                 DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n"));
 311
 312                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 313                                      timeval_current_ofs(1, 0),
 314                                      ctdb_wait_until_recovered, ctdb);
 315                 return;
 316         }
 317
 318         if (ctdb->vnn_map->generation == ctdb->db_persistent_startup_generation) {
 319                 DEBUG(DEBUG_INFO,(__location__ " skip ctdb_recheck_persistent_health() "
 320                                   "until the next recovery\n"));
 321                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 322                                      timeval_current_ofs(1, 0),
 323                                      ctdb_wait_until_recovered, ctdb);
 324                 return;
 325         }
 326
 327         ctdb->db_persistent_startup_generation = ctdb->vnn_map->generation;
 328         ret = ctdb_recheck_persistent_health(ctdb);
 329         if (ret != 0) {
 330                 ctdb->db_persistent_check_errors++;
 331                 if (ctdb->db_persistent_check_errors < ctdb->max_persistent_check_errors) {
 332                         DEBUG(ctdb->db_persistent_check_errors==1?DEBUG_ERR:DEBUG_WARNING,
 333                               (__location__ "ctdb_recheck_persistent_health() "
 334                               "failed (%llu of %llu times) - retry later\n",
 335                               (unsigned long long)ctdb->db_persistent_check_errors,
 336                               (unsigned long long)ctdb->max_persistent_check_errors));
 337                         event_add_timed(ctdb->ev,
 338                                         ctdb->monitor->monitor_context,
 339                                         timeval_current_ofs(1, 0),
 340                                         ctdb_wait_until_recovered, ctdb);
 341                         return;
 342                 }
 343                 DEBUG(DEBUG_ALERT,(__location__
 344                                   "ctdb_recheck_persistent_health() failed (%llu times) - prepare shutdown\n",
 345                                   (unsigned long long)ctdb->db_persistent_check_errors));
 346                 ctdb_shutdown_sequence(ctdb, 11);
 347                 /* In case above returns due to duplicate shutdown */
 348                 return;
 349         }
 350         ctdb->db_persistent_check_errors = 0;
 351
 352         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 353                         timeval_current(), ctdb_run_startup, ctdb);
 354 }
 355
 356
 357 /*
 358   see if the event scripts think we are healthy
 359  */
 360 static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
 361                               struct timeval t, void *private_data)
 362 {
 363         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 364         bool skip_monitoring = false;
 365         int ret = 0;
 366
 367         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL ||
 368             ctdb->monitor->monitoring_mode == CTDB_MONITORING_DISABLED) {
 369                 skip_monitoring = true;
 370         } else {
 371                 int i;
 372                 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
 373                         if (ctdb->freeze_handles[i] != NULL) {
 374                                 DEBUG(DEBUG_ERR,
 375                                       ("Skip monitoring since databases are frozen\n"));
 376                                 skip_monitoring = true;
 377                                 break;
 378                         }
 379                 }
 380         }
 381
 382         if (skip_monitoring) {
 383                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 384                                 timeval_current_ofs(ctdb->monitor->next_interval, 0),
 385                                 ctdb_check_health, ctdb);
 386                 return;
 387         }
 388
 389         ret = ctdb_event_script_callback(ctdb,
 390                                          ctdb->monitor->monitor_context,
 391                                          ctdb_health_callback,
 392                                          ctdb, CTDB_EVENT_MONITOR, "%s", "");
 393         if (ret != 0) {
 394                 DEBUG(DEBUG_ERR,("Unable to launch monitor event script\n"));
 395                 ctdb->monitor->next_interval = 5;
 396                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 397                                 timeval_current_ofs(5, 0),
 398                                 ctdb_check_health, ctdb);
 399         }
 400 }
 401
 402 /*
 403   (Temporaily) Disabling monitoring will stop the monitor event scripts
 404   from running   but node health checks will still occur
 405 */
 406 void ctdb_disable_monitoring(struct ctdb_context *ctdb)
 407 {
 408         ctdb->monitor->monitoring_mode = CTDB_MONITORING_DISABLED;
 409         DEBUG(DEBUG_INFO,("Monitoring has been disabled\n"));
 410 }
 411
 412 /*
 413    Re-enable running monitor events after they have been disabled
 414  */
 415 void ctdb_enable_monitoring(struct ctdb_context *ctdb)
 416 {
 417         ctdb->monitor->monitoring_mode  = CTDB_MONITORING_ACTIVE;
 418         ctdb->monitor->next_interval = 5;
 419         DEBUG(DEBUG_INFO,("Monitoring has been enabled\n"));
 420 }
 421
 422 /* stop any monitoring
 423    this should only be done when shutting down the daemon
 424 */
 425 void ctdb_stop_monitoring(struct ctdb_context *ctdb)
 426 {
 427         talloc_free(ctdb->monitor->monitor_context);
 428         ctdb->monitor->monitor_context = NULL;
 429
 430         ctdb->monitor->monitoring_mode  = CTDB_MONITORING_DISABLED;
 431         ctdb->monitor->next_interval = 5;
 432         DEBUG(DEBUG_NOTICE,("Monitoring has been stopped\n"));
 433 }
 434
 435 /*
 436   start watching for nodes that might be dead
 437  */
 438 void ctdb_wait_for_first_recovery(struct ctdb_context *ctdb)
 439 {
 440         ctdb_set_runstate(ctdb, CTDB_RUNSTATE_FIRST_RECOVERY);
 441
 442         ctdb->monitor = talloc(ctdb, struct ctdb_monitor_state);
 443         CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor);
 444
 445         ctdb->monitor->monitor_context = talloc_new(ctdb->monitor);
 446         CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor->monitor_context);
 447
 448         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 449                         timeval_current_ofs(1, 0),
 450                         ctdb_wait_until_recovered, ctdb);
 451 }
 452
 453
 454 /*
 455   modify flags on a node
 456  */
 457 int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
 458 {
 459         struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
 460         struct ctdb_node *node;
 461         uint32_t old_flags;
 462
 463         if (c->pnn >= ctdb->num_nodes) {
 464                 DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes));
 465                 return -1;
 466         }
 467
 468         node         = ctdb->nodes[c->pnn];
 469         old_flags    = node->flags;
 470         if (c->pnn != ctdb->pnn) {
 471                 c->old_flags  = node->flags;
 472         }
 473         node->flags   = c->new_flags & ~NODE_FLAGS_DISCONNECTED;
 474         node->flags  |= (c->old_flags & NODE_FLAGS_DISCONNECTED);
 475
 476         /* we dont let other nodes modify our STOPPED status */
 477         if (c->pnn == ctdb->pnn) {
 478                 node->flags &= ~NODE_FLAGS_STOPPED;
 479                 if (old_flags & NODE_FLAGS_STOPPED) {
 480                         node->flags |= NODE_FLAGS_STOPPED;
 481                 }
 482         }
 483
 484         /* we dont let other nodes modify our BANNED status */
 485         if (c->pnn == ctdb->pnn) {
 486                 node->flags &= ~NODE_FLAGS_BANNED;
 487                 if (old_flags & NODE_FLAGS_BANNED) {
 488                         node->flags |= NODE_FLAGS_BANNED;
 489                 }
 490         }
 491
 492         if (node->flags == c->old_flags) {
 493                 DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags));
 494                 return 0;
 495         }
 496
 497         DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags));
 498
 499         if (node->flags == 0 && ctdb->runstate <= CTDB_RUNSTATE_STARTUP) {
 500                 DEBUG(DEBUG_ERR, (__location__ " Node %u became healthy - force recovery for startup\n",
 501                                   c->pnn));
 502                 ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
 503         }
 504
 505         /* tell the recovery daemon something has changed */
 506         c->new_flags = node->flags;
 507         ctdb_daemon_send_message(ctdb, ctdb->pnn,
 508                                  CTDB_SRVID_SET_NODE_FLAGS, indata);
 509
 510         /* if we have become banned, we should go into recovery mode */
 511         if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) {
 512                 ctdb_local_node_got_banned(ctdb);
 513         }
 514
 515         return 0;
 516 }
 517
 518 /*
 519   return the monitoring mode
 520  */
 521 int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb)
 522 {
 523         if (ctdb->monitor == NULL) {
 524                 return CTDB_MONITORING_DISABLED;
 525         }
 526         return ctdb->monitor->monitoring_mode;
 527 }
 528
 529 /*
 530  * Check if monitoring has been stopped
 531  */
 532 bool ctdb_stopped_monitoring(struct ctdb_context *ctdb)
 533 {
 534         return (ctdb->monitor->monitor_context == NULL ? true : false);
 535 }