server: Use tdb_check to verify persistent tdbs on startup
authorStefan Metzmacher <metze@samba.org>
Mon, 7 Dec 2009 12:28:11 +0000 (13:28 +0100)
committerStefan Metzmacher <metze@samba.org>
Wed, 16 Dec 2009 07:06:10 +0000 (08:06 +0100)
Depending on --max-persistent-check-errors we allow ctdb
to start with unhealthy persistent databases.

The default is 0 which means to reject a startup with
unhealthy dbs.

The health of the persistent databases is checked after each
recovery. Node monitoring and the "startup" is deferred
until all persistent databases are healthy.

Databases can become healthy automaticly by a completely
HEALTHY node joining the cluster. Or by an administrator
with "ctdb backupdb/restoredb" or "ctdb wipedb".

metze

include/ctdb_private.h
server/ctdb_daemon.c
server/ctdb_freeze.c
server/ctdb_ltdb_server.c
server/ctdb_monitor.c
server/ctdb_persistent.c
server/ctdb_recover.c
server/ctdb_traverse.c
server/ctdb_tunables.c
server/ctdbd.c

index 5f63c8f97bfffb53978863fdf1e935664eb95877..f29e5dec41f29c458a58bdd73dfa63d81c3f5f80 100644 (file)
@@ -129,6 +129,7 @@ struct ctdb_tunable {
        uint32_t vacuum_max_interval;
        uint32_t max_queue_depth_drop_msg;
        uint32_t use_status_events_for_monitoring;
+       uint32_t allow_unhealthy_db_read;
 };
 
 /*
@@ -407,6 +408,9 @@ struct ctdb_context {
        const char *db_directory_persistent;
        const char *db_directory_state;
        struct tdb_wrap *db_persistent_health;
+       uint32_t db_persistent_startup_generation;
+       uint64_t db_persistent_check_errors;
+       uint64_t max_persistent_check_errors;
        const char *transport;
        char *recovery_lock_file;
        int recovery_lock_fd;
@@ -479,6 +483,7 @@ struct ctdb_db_context {
        struct ctdb_traverse_local_handle *traverse;
        bool transaction_active;
        struct ctdb_vacuum_handle *vacuum_handle;
+       char *unhealthy_reason;
 };
 
 
@@ -1543,4 +1548,12 @@ int32_t ctdb_control_get_db_seqnum(struct ctdb_context *ctdb,
                                   TDB_DATA indata,
                                   TDB_DATA *outdata);
 
+int ctdb_load_persistent_health(struct ctdb_context *ctdb,
+                               struct ctdb_db_context *ctdb_db);
+int ctdb_update_persistent_health(struct ctdb_context *ctdb,
+                                 struct ctdb_db_context *ctdb_db,
+                                 const char *reason,/* NULL means healthy */
+                                 int num_healthy_nodes);
+int ctdb_recheck_persistent_health(struct ctdb_context *ctdb);
+
 #endif
index 42b2c124b15671ab311598363710b19804c6ca98..7f3128b4698465e782e5e92c38386cd29f08c48e 100644 (file)
@@ -354,6 +354,16 @@ static void daemon_request_call_from_client(struct ctdb_client *client,
                return;
        }
 
+       if (ctdb_db->unhealthy_reason) {
+               /*
+                * this is just a warning, as the tdb should be empty anyway,
+                * and only persistent databases can be unhealthy, which doesn't
+                * use this code patch
+                */
+               DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in daemon_request_call_from_client(): %s\n",
+                                    ctdb_db->db_name, ctdb_db->unhealthy_reason));
+       }
+
        key.dptr = c->data;
        key.dsize = c->keylen;
 
index 37c90a275baff0bbe00c658b1a40ecf4bdc09f7f..38520087ce39f2e24b1340677121aa88b4be36a0 100644 (file)
@@ -489,7 +489,8 @@ int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
 {
        struct ctdb_db_context *ctdb_db;
        int i;
-       
+       int healthy_nodes = 0;
+
        for (i=1;i<=NUM_DB_PRIORITIES; i++) {
                if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
                        DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
@@ -507,6 +508,16 @@ int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
                return -1;
        }
 
+       DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
+       for (i=0; i < ctdb->num_nodes; i++) {
+               DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
+                                  i, ctdb->nodes[i]->flags));
+               if (ctdb->nodes[i]->flags == 0) {
+                       healthy_nodes++;
+               }
+       }
+       DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
+
        for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
                int ret;
 
@@ -518,6 +529,14 @@ int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
                        goto fail;
                }
                tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+
+               ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, healthy_nodes);
+               if (ret != 0) {
+                       DEBUG(DEBUG_CRIT,(__location__ " Failed to update persistent health for db '%s'. "
+                                        "Cancel all remaining transactions and resetting transaction_started to false.\n",
+                                        ctdb_db->db_name));
+                       goto fail;
+               }
        }
 
        ctdb->freeze_transaction_started = false;
index 9a4044e0caff75f901e558eabfdde852b1bb1c75..b966386b1391ac8974db3e416f51b7248f72ce3e 100644 (file)
@@ -23,6 +23,7 @@
 #include "system/network.h"
 #include "system/filesys.h"
 #include "system/dir.h"
+#include "system/time.h"
 #include "../include/ctdb_private.h"
 #include "db_wrap.h"
 #include "lib/util/dlinklist.h"
@@ -190,6 +191,250 @@ static void ctdb_check_db_empty(struct ctdb_db_context *ctdb_db)
        }
 }
 
+int ctdb_load_persistent_health(struct ctdb_context *ctdb,
+                               struct ctdb_db_context *ctdb_db)
+{
+       struct tdb_context *tdb = ctdb->db_persistent_health->tdb;
+       char *old;
+       char *reason = NULL;
+       TDB_DATA key;
+       TDB_DATA val;
+
+       key.dptr = discard_const_p(uint8_t, ctdb_db->db_name);
+       key.dsize = strlen(ctdb_db->db_name);
+
+       old = ctdb_db->unhealthy_reason;
+       ctdb_db->unhealthy_reason = NULL;
+
+       val = tdb_fetch(tdb, key);
+       if (val.dsize > 0) {
+               reason = talloc_strndup(ctdb_db,
+                                       (const char *)val.dptr,
+                                       val.dsize);
+               if (reason == NULL) {
+                       DEBUG(DEBUG_ALERT,(__location__ " talloc_strndup(%d) failed\n",
+                                          (int)val.dsize));
+                       ctdb_db->unhealthy_reason = old;
+                       free(val.dptr);
+                       return -1;
+               }
+       }
+
+       if (val.dptr) {
+               free(val.dptr);
+       }
+
+       talloc_free(old);
+       ctdb_db->unhealthy_reason = reason;
+       return 0;
+}
+
+int ctdb_update_persistent_health(struct ctdb_context *ctdb,
+                                 struct ctdb_db_context *ctdb_db,
+                                 const char *given_reason,/* NULL means healthy */
+                                 int num_healthy_nodes)
+{
+       struct tdb_context *tdb = ctdb->db_persistent_health->tdb;
+       int ret;
+       TDB_DATA key;
+       TDB_DATA val;
+       char *new_reason = NULL;
+       char *old_reason = NULL;
+
+       ret = tdb_transaction_start(tdb);
+       if (ret != 0) {
+               DEBUG(DEBUG_ALERT,(__location__ " tdb_transaction_start('%s') failed: %d - %s\n",
+                                  tdb_name(tdb), ret, tdb_errorstr(tdb)));
+               return -1;
+       }
+
+       ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+       if (ret != 0) {
+               DEBUG(DEBUG_ALERT,(__location__ " ctdb_load_persistent_health('%s') failed: %d\n",
+                                  ctdb_db->db_name, ret));
+               return -1;
+       }
+       old_reason = ctdb_db->unhealthy_reason;
+
+       key.dptr = discard_const_p(uint8_t, ctdb_db->db_name);
+       key.dsize = strlen(ctdb_db->db_name);
+
+       if (given_reason) {
+               new_reason = talloc_strdup(ctdb_db, given_reason);
+               if (new_reason == NULL) {
+                       DEBUG(DEBUG_ALERT,(__location__ " talloc_strdup(%s) failed\n",
+                                         given_reason));
+                       return -1;
+               }
+       } else if (old_reason && num_healthy_nodes == 0) {
+               /*
+                * If the reason indicates ok, but there where no healthy nodes
+                * available, that it means, we have not recovered valid content
+                * of the db. So if there's an old reason, prefix it with
+                * "NO-HEALTHY-NODES - "
+                */
+               const char *prefix;
+
+#define _TMP_PREFIX "NO-HEALTHY-NODES - "
+               ret = strncmp(_TMP_PREFIX, old_reason, strlen(_TMP_PREFIX));
+               if (ret != 0) {
+                       prefix = _TMP_PREFIX;
+               } else {
+                       prefix = "";
+               }
+               new_reason = talloc_asprintf(ctdb_db, "%s%s",
+                                        prefix, old_reason);
+               if (new_reason == NULL) {
+                       DEBUG(DEBUG_ALERT,(__location__ " talloc_asprintf(%s%s) failed\n",
+                                         prefix, old_reason));
+                       return -1;
+               }
+#undef _TMP_PREFIX
+       }
+
+       if (new_reason) {
+               val.dptr = discard_const_p(uint8_t, new_reason);
+               val.dsize = strlen(new_reason);
+
+               ret = tdb_store(tdb, key, val, TDB_REPLACE);
+               if (ret != 0) {
+                       tdb_transaction_cancel(tdb);
+                       DEBUG(DEBUG_ALERT,(__location__ " tdb_store('%s', %s, %s) failed: %d - %s\n",
+                                          tdb_name(tdb), ctdb_db->db_name, new_reason,
+                                          ret, tdb_errorstr(tdb)));
+                       talloc_free(new_reason);
+                       return -1;
+               }
+               DEBUG(DEBUG_ALERT,("Updated db health for db(%s) to: %s\n",
+                                  ctdb_db->db_name, new_reason));
+       } else if (old_reason) {
+               ret = tdb_delete(tdb, key);
+               if (ret != 0) {
+                       tdb_transaction_cancel(tdb);
+                       DEBUG(DEBUG_ALERT,(__location__ " tdb_delete('%s', %s) failed: %d - %s\n",
+                                          tdb_name(tdb), ctdb_db->db_name,
+                                          ret, tdb_errorstr(tdb)));
+                       talloc_free(new_reason);
+                       return -1;
+               }
+               DEBUG(DEBUG_NOTICE,("Updated db health for db(%s): OK\n",
+                                  ctdb_db->db_name));
+       }
+
+       ret = tdb_transaction_commit(tdb);
+       if (ret != TDB_SUCCESS) {
+               DEBUG(DEBUG_ALERT,(__location__ " tdb_transaction_commit('%s') failed: %d - %s\n",
+                                  tdb_name(tdb), ret, tdb_errorstr(tdb)));
+               talloc_free(new_reason);
+               return -1;
+       }
+
+       talloc_free(old_reason);
+       ctdb_db->unhealthy_reason = new_reason;
+
+       return 0;
+}
+
+static int ctdb_backup_corrupted_tdb(struct ctdb_context *ctdb,
+                                    struct ctdb_db_context *ctdb_db)
+{
+       time_t now = time(NULL);
+       char *new_path;
+       char *new_reason;
+       int ret;
+       struct tm *tm;
+
+       tm = gmtime(&now);
+
+       /* formatted like: foo.tdb.0.corrupted.20091204160825.0Z */
+       new_path = talloc_asprintf(ctdb_db, "%s.corrupted."
+                                  "%04u%02u%02u%02u%02u%02u.0Z",
+                                  ctdb_db->db_path,
+                                  tm->tm_year+1900, tm->tm_mon+1,
+                                  tm->tm_mday, tm->tm_hour, tm->tm_min,
+                                  tm->tm_sec);
+       if (new_path == NULL) {
+               DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+               return -1;
+       }
+
+       new_reason = talloc_asprintf(ctdb_db,
+                                    "ERROR - Backup of corrupted TDB in '%s'",
+                                    new_path);
+       if (new_reason == NULL) {
+               DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+               return -1;
+       }
+       ret = ctdb_update_persistent_health(ctdb, ctdb_db, new_reason, 0);
+       talloc_free(new_reason);
+       if (ret != 0) {
+               DEBUG(DEBUG_CRIT,(__location__
+                                ": ctdb_backup_corrupted_tdb(%s) not implemented yet\n",
+                                ctdb_db->db_path));
+               return -1;
+       }
+
+       ret = rename(ctdb_db->db_path, new_path);
+       if (ret != 0) {
+               DEBUG(DEBUG_CRIT,(__location__
+                                 ": ctdb_backup_corrupted_tdb(%s) rename to %s failed: %d - %s\n",
+                                 ctdb_db->db_path, new_path,
+                                 errno, strerror(errno)));
+               talloc_free(new_path);
+               return -1;
+       }
+
+       DEBUG(DEBUG_CRIT,(__location__
+                        ": ctdb_backup_corrupted_tdb(%s) renamed to %s\n",
+                        ctdb_db->db_path, new_path));
+       talloc_free(new_path);
+       return 0;
+}
+
+int ctdb_recheck_persistent_health(struct ctdb_context *ctdb)
+{
+       struct ctdb_db_context *ctdb_db;
+       int ret;
+       int ok = 0;
+       int fail = 0;
+
+       for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
+               if (!ctdb_db->persistent) {
+                       continue;
+               }
+
+               ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ALERT,(__location__
+                                          " load persistent health for '%s' failed\n",
+                                          ctdb_db->db_path));
+                       return -1;
+               }
+
+               if (ctdb_db->unhealthy_reason == NULL) {
+                       ok++;
+                       DEBUG(DEBUG_INFO,(__location__
+                                  " persistent db '%s' healthy\n",
+                                  ctdb_db->db_path));
+                       continue;
+               }
+
+               fail++;
+               DEBUG(DEBUG_ALERT,(__location__
+                                  " persistent db '%s' unhealthy: %s\n",
+                                  ctdb_db->db_path,
+                                  ctdb_db->unhealthy_reason));
+       }
+       DEBUG((fail!=0)?DEBUG_ALERT:DEBUG_NOTICE,
+             ("ctdb_recheck_presistent_health: OK[%d] FAIL[%d]\n",
+              ok, fail));
+
+       if (fail != 0) {
+               return -1;
+       }
+
+       return 0;
+}
 
 /*
   attach to a database, handling both persistent and non-persistent databases
@@ -202,6 +447,8 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name,
        int ret;
        struct TDB_DATA key;
        unsigned tdb_flags;
+       int mode = 0600;
+       int remaining_tries = 0;
 
        ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
        CTDB_NO_MEMORY(ctdb, ctdb_db);
@@ -226,6 +473,47 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name,
                }
        }
 
+       if (persistent) {
+               if (unhealthy_reason) {
+                       ret = ctdb_update_persistent_health(ctdb, ctdb_db,
+                                                           unhealthy_reason, 0);
+                       if (ret != 0) {
+                               DEBUG(DEBUG_ALERT,(__location__ " ctdb_update_persistent_health('%s','%s') failed: %d\n",
+                                                  ctdb_db->db_name, unhealthy_reason, ret));
+                               talloc_free(ctdb_db);
+                               return -1;
+                       }
+               }
+
+               if (ctdb->max_persistent_check_errors > 0) {
+                       remaining_tries = 1;
+               }
+               if (ctdb->done_startup) {
+                       remaining_tries = 0;
+               }
+
+               ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ALERT,(__location__ " ctdb_load_persistent_health('%s') failed: %d\n",
+                                  ctdb_db->db_name, ret));
+                       talloc_free(ctdb_db);
+                       return -1;
+               }
+       }
+
+       if (ctdb_db->unhealthy_reason && remaining_tries == 0) {
+               DEBUG(DEBUG_ALERT,(__location__ "ERROR: tdb %s is marked as unhealthy: %s\n",
+                                  ctdb_db->db_name, ctdb_db->unhealthy_reason));
+               talloc_free(ctdb_db);
+               return -1;
+       }
+
+       if (ctdb_db->unhealthy_reason) {
+               /* this is just a warning, but we want that in the log file! */
+               DEBUG(DEBUG_ALERT,(__location__ "Warning: tdb %s is marked as unhealthy: %s\n",
+                                  ctdb_db->db_name, ctdb_db->unhealthy_reason));
+       }
+
        /* open the database */
        ctdb_db->db_path = talloc_asprintf(ctdb_db, "%s/%s.%u", 
                                           persistent?ctdb->db_directory_persistent:ctdb->db_directory, 
@@ -237,18 +525,105 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name,
        }
        tdb_flags |= TDB_DISALLOW_NESTING;
 
+again:
        ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 
                                      ctdb->tunable.database_hash_size, 
                                      tdb_flags, 
-                                     O_CREAT|O_RDWR, 0600);
+                                     O_CREAT|O_RDWR, mode);
        if (ctdb_db->ltdb == NULL) {
-               DEBUG(DEBUG_CRIT,("Failed to open tdb '%s'\n", ctdb_db->db_path));
-               talloc_free(ctdb_db);
-               return -1;
+               struct stat st;
+               int saved_errno = errno;
+
+               if (!persistent) {
+                       DEBUG(DEBUG_CRIT,("Failed to open tdb '%s': %d - %s\n",
+                                         ctdb_db->db_path,
+                                         saved_errno,
+                                         strerror(saved_errno)));
+                       talloc_free(ctdb_db);
+                       return -1;
+               }
+
+               if (remaining_tries == 0) {
+                       DEBUG(DEBUG_CRIT,(__location__
+                                         "Failed to open persistent tdb '%s': %d - %s\n",
+                                         ctdb_db->db_path,
+                                         saved_errno,
+                                         strerror(saved_errno)));
+                       talloc_free(ctdb_db);
+                       return -1;
+               }
+
+               ret = stat(ctdb_db->db_path, &st);
+               if (ret != 0) {
+                       DEBUG(DEBUG_CRIT,(__location__
+                                         "Failed to open persistent tdb '%s': %d - %s\n",
+                                         ctdb_db->db_path,
+                                         saved_errno,
+                                         strerror(saved_errno)));
+                       talloc_free(ctdb_db);
+                       return -1;
+               }
+
+               ret = ctdb_backup_corrupted_tdb(ctdb, ctdb_db);
+               if (ret != 0) {
+                       DEBUG(DEBUG_CRIT,(__location__
+                                         "Failed to open persistent tdb '%s': %d - %s\n",
+                                         ctdb_db->db_path,
+                                         saved_errno,
+                                         strerror(saved_errno)));
+                       talloc_free(ctdb_db);
+                       return -1;
+               }
+
+               remaining_tries--;
+               mode = st.st_mode;
+               goto again;
        }
 
        if (!persistent) {
                ctdb_check_db_empty(ctdb_db);
+       } else {
+               ret = tdb_check(ctdb_db->ltdb->tdb, NULL, NULL);
+               if (ret != 0) {
+                       int fd;
+                       struct stat st;
+
+                       DEBUG(DEBUG_CRIT,("tdb_check(%s) failed: %d - %s\n",
+                                         ctdb_db->db_path, ret,
+                                         tdb_errorstr(ctdb_db->ltdb->tdb)));
+                       if (remaining_tries == 0) {
+                               talloc_free(ctdb_db);
+                               return -1;
+                       }
+
+                       fd = tdb_fd(ctdb_db->ltdb->tdb);
+                       ret = fstat(fd, &st);
+                       if (ret != 0) {
+                               DEBUG(DEBUG_CRIT,(__location__
+                                                 "Failed to fstat() persistent tdb '%s': %d - %s\n",
+                                                 ctdb_db->db_path,
+                                                 errno,
+                                                 strerror(errno)));
+                               talloc_free(ctdb_db);
+                               return -1;
+                       }
+
+                       /* close the TDB */
+                       talloc_free(ctdb_db->ltdb);
+                       ctdb_db->ltdb = NULL;
+
+                       ret = ctdb_backup_corrupted_tdb(ctdb, ctdb_db);
+                       if (ret != 0) {
+                               DEBUG(DEBUG_CRIT,("Failed to backup corrupted tdb '%s'\n",
+                                                 ctdb_db->db_path));
+                               talloc_free(ctdb_db);
+                               return -1;
+                       }
+
+                       remaining_tries--;
+                       mode = st.st_mode;
+                       goto again;
+               }
        }
 
        DLIST_ADD(ctdb->db_list, ctdb_db);
@@ -587,6 +962,12 @@ int32_t ctdb_ltdb_update_seqnum(struct ctdb_context *ctdb, uint32_t db_id, uint3
                return -1;
        }
 
+       if (ctdb_db->unhealthy_reason) {
+               DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_ltdb_update_seqnum: %s\n",
+                                ctdb_db->db_name, ctdb_db->unhealthy_reason));
+               return -1;
+       }
+
        tdb_increment_seqnum_nonblock(ctdb_db->ltdb->tdb);
        ctdb_db->seqnum = tdb_get_seqnum(ctdb_db->ltdb->tdb);
        return 0;
index 2bf5dcb99f5430c3c137af34dc53f863082301b2..729895c68a612117b0a19b8f69b32b3847b97fdf 100644 (file)
@@ -220,10 +220,13 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
                              struct timeval t, void *private_data)
 {
        struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+       int ret;
 
        DEBUG(DEBUG_NOTICE,("CTDB_WAIT_UNTIL_RECOVERED\n"));
 
        if (ctdb->vnn_map->generation == INVALID_GENERATION) {
+               ctdb->db_persistent_startup_generation = INVALID_GENERATION;
+
                DEBUG(DEBUG_NOTICE,(__location__ " generation is INVALID. Wait one more second\n"));
                event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
                                     timeval_current_ofs(1, 0), 
@@ -232,6 +235,8 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
        }
 
        if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+               ctdb->db_persistent_startup_generation = INVALID_GENERATION;
+
                DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n"));
                event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
                                     timeval_current_ofs(1, 0), 
@@ -241,6 +246,8 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
 
 
        if (timeval_elapsed(&ctdb->last_recovery_finished) < (ctdb->tunable.rerecovery_timeout + 3)) {
+               ctdb->db_persistent_startup_generation = INVALID_GENERATION;
+
                DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n"));
 
                event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
@@ -249,6 +256,48 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
                return;
        }
 
+       if (ctdb->vnn_map->generation == ctdb->db_persistent_startup_generation) {
+               DEBUG(DEBUG_INFO,(__location__ " skip ctdb_recheck_persistent_health() "
+                                 "until the next recovery\n"));
+               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
+                                    timeval_current_ofs(1, 0),
+                                    ctdb_wait_until_recovered, ctdb);
+               return;
+       }
+
+       ctdb->db_persistent_startup_generation = ctdb->vnn_map->generation;
+       ret = ctdb_recheck_persistent_health(ctdb);
+       if (ret != 0) {
+               ctdb->db_persistent_check_errors++;
+               if (ctdb->db_persistent_check_errors < ctdb->max_persistent_check_errors) {
+                       DEBUG(ctdb->db_persistent_check_errors==1?DEBUG_ERR:DEBUG_WARNING,
+                             (__location__ "ctdb_recheck_persistent_health() "
+                             "failed (%llu of %llu times) - retry later\n",
+                             (unsigned long long)ctdb->db_persistent_check_errors,
+                             (unsigned long long)ctdb->max_persistent_check_errors));
+                       event_add_timed(ctdb->ev,
+                                       ctdb->monitor->monitor_context,
+                                       timeval_current_ofs(1, 0),
+                                       ctdb_wait_until_recovered, ctdb);
+                       return;
+               }
+               DEBUG(DEBUG_ALERT,(__location__
+                                 "ctdb_recheck_persistent_health() failed (%llu times) - prepare shutdown\n",
+                                 (unsigned long long)ctdb->db_persistent_check_errors));
+               ctdb_stop_recoverd(ctdb);
+               ctdb_stop_keepalive(ctdb);
+               ctdb_stop_monitoring(ctdb);
+               ctdb_release_all_ips(ctdb);
+               if (ctdb->methods != NULL) {
+                       ctdb->methods->shutdown(ctdb);
+               }
+               ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
+               DEBUG(DEBUG_ALERT,("ctdb_recheck_persistent_health() failed - Stopping CTDB daemon\n"));
+               exit(11);
+       }
+       ctdb->db_persistent_check_errors = 0;
+       DEBUG(DEBUG_NOTICE,(__location__
+                          "ctdb_start_monitoring: ctdb_recheck_persistent_health() OK\n"));
 
        DEBUG(DEBUG_NOTICE,(__location__ " Recoveries finished. Running the \"startup\" event.\n"));
        event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
@@ -421,6 +470,11 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
 
        DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags));
 
+       if (node->flags == 0 && !ctdb->done_startup) {
+               DEBUG(DEBUG_ERR, (__location__ " Node %u became healthy - force recovery for startup\n",
+                                 c->pnn));
+               ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+       }
 
        /* tell the recovery daemon something has changed */
        ctdb_daemon_send_message(ctdb, ctdb->pnn,
index 59ddadb042fe7eed63c01593139db4b3bf60ad4e..b686cbdee86ed2ba3af60d710849c0ec293154a4 100644 (file)
@@ -117,6 +117,12 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
                return -1;
        }
 
+       if (ctdb_db->unhealthy_reason) {
+               DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_trans2_commit: %s\n",
+                                ctdb_db->db_name, ctdb_db->unhealthy_reason));
+               return -1;
+       }
+
        /* handling num_persistent_updates is a bit strange - 
           there are 3 cases
             1) very old clients, which never called CTDB_CONTROL_START_PERSISTENT_UPDATE
@@ -597,6 +603,12 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
                return -1;
        }
 
+       if (ctdb_db->unhealthy_reason) {
+               DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_update_record: %s\n",
+                                ctdb_db->db_name, ctdb_db->unhealthy_reason));
+               return -1;
+       }
+
        state = talloc(ctdb, struct ctdb_persistent_write_state);
        CTDB_NO_MEMORY(ctdb, state);
 
index 8568e8bbe07e59984f8255ab0dd9b31f6b7aa4c6..ecc01e6206ca267ec982a07806489c651c44138d 100644 (file)
@@ -386,6 +386,12 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT
        params.len = offsetof(struct ctdb_marshall_buffer, data);
        params.failed = false;
 
+       if (ctdb_db->unhealthy_reason) {
+               /* this is just a warning, as the tdb should be empty anyway */
+               DEBUG(DEBUG_WARNING,("db(%s) unhealty in ctdb_control_pull_db: %s\n",
+                                    ctdb_db->db_name, ctdb_db->unhealthy_reason));
+       }
+
        if (ctdb_lock_all_databases_mark(ctdb, ctdb_db->priority) != 0) {
                DEBUG(DEBUG_ERR,(__location__ " Failed to get lock on entired db - failing\n"));
                return -1;
index d66036f9a1c4de81a36b8c932cde89af0af82961..26d43280629121e5accd2efe5ba32f71f51b833f 100644 (file)
@@ -388,6 +388,16 @@ int32_t ctdb_control_traverse_all(struct ctdb_context *ctdb, TDB_DATA data, TDB_
                return -1;
        }
 
+       if (ctdb_db->unhealthy_reason) {
+               if (ctdb->tunable.allow_unhealthy_db_read == 0) {
+                       DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_traverse_all: %s\n",
+                                       ctdb_db->db_name, ctdb_db->unhealthy_reason));
+                       return -1;
+               }
+               DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in ctdb_control_traverse_all: %s\n",
+                                    ctdb_db->db_name, ctdb_db->unhealthy_reason));
+       }
+
        state = talloc(ctdb_db, struct traverse_all_state);
        if (state == NULL) {
                return -1;
@@ -561,6 +571,16 @@ int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA data,
                return -1;
        }
 
+       if (ctdb_db->unhealthy_reason) {
+               if (ctdb->tunable.allow_unhealthy_db_read == 0) {
+                       DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_traverse_start: %s\n",
+                                       ctdb_db->db_name, ctdb_db->unhealthy_reason));
+                       return -1;
+               }
+               DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in ctdb_control_traverse_start: %s\n",
+                                    ctdb_db->db_name, ctdb_db->unhealthy_reason));
+       }
+
        state = talloc(client, struct traverse_start_state);
        if (state == NULL) {
                return -1;
index 17949d10aea05d55f74124b9ae354c45fda6e25e..e75dcbd74f0067a055f112660bf8cb75c3ee04d7 100644 (file)
@@ -63,7 +63,8 @@ static const struct {
        { "VacuumMinInterval",   60,  offsetof(struct ctdb_tunable, vacuum_min_interval) },
        { "VacuumMaxInterval",  600,  offsetof(struct ctdb_tunable, vacuum_max_interval) },
        { "MaxQueueDropMsg",  1000,  offsetof(struct ctdb_tunable, max_queue_depth_drop_msg) },
-       { "UseStatusEvents",     0,  offsetof(struct ctdb_tunable, use_status_events_for_monitoring) }
+       { "UseStatusEvents",     0,  offsetof(struct ctdb_tunable, use_status_events_for_monitoring) },
+       { "AllowUnhealthyDBRead", 0,  offsetof(struct ctdb_tunable, allow_unhealthy_db_read) }
 };
 
 /*
index 7cffde09eec29a743d9f11d8f38150054c350080..e32aa6580fab8a8625de5ac5ff00c4a86d49c9ce 100644 (file)
@@ -51,6 +51,7 @@ static struct {
        int         lvs;
        int         script_log_level;
        int         no_publicipcheck;
+       int         max_persistent_check_errors;
 } options = {
        .nlist = ETCDIR "/ctdb/nodes",
        .transport = "tcp",
@@ -139,6 +140,9 @@ int main(int argc, const char *argv[])
                { "lvs", 0, POPT_ARG_NONE, &options.lvs, 0, "lvs is enabled on this node", NULL },
                { "script-log-level", 0, POPT_ARG_INT, &options.script_log_level, DEBUG_ERR, "log level of event script output", NULL },
                { "nopublicipcheck", 0, POPT_ARG_NONE, &options.no_publicipcheck, 0, "dont check we have/dont have the correct public ip addresses", NULL },
+               { "max-persistent-check-errors", 0, POPT_ARG_INT,
+                 &options.max_persistent_check_errors, 0,
+                 "max allowed persistent check errors (default 0)", NULL },
                POPT_TABLEEND
        };
        int opt, ret;
@@ -325,6 +329,12 @@ int main(int argc, const char *argv[])
 
        ctdb->do_checkpublicip = !options.no_publicipcheck;
 
+       if (options.max_persistent_check_errors < 0) {
+               ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
+       } else {
+               ctdb->max_persistent_check_errors = (uint64_t)options.max_persistent_check_errors;
+       }
+
        if (getenv("CTDB_BASE") == NULL) {
                /* setup a environment variable for the event scripts to use
                   to find the installation directory */