#include "system/network.h"
#include "system/filesys.h"
#include "system/dir.h"
+#include "system/time.h"
#include "../include/ctdb_private.h"
#include "db_wrap.h"
#include "lib/util/dlinklist.h"
#include <ctype.h>
+#define PERSISTENT_HEALTH_TDB "persistent_health.tdb"
+
/*
this is the dummy null procedure that all databases support
*/
}
}
+int ctdb_load_persistent_health(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db)
+{
+ struct tdb_context *tdb = ctdb->db_persistent_health->tdb;
+ char *old;
+ char *reason = NULL;
+ TDB_DATA key;
+ TDB_DATA val;
+
+ key.dptr = discard_const_p(uint8_t, ctdb_db->db_name);
+ key.dsize = strlen(ctdb_db->db_name);
+
+ old = ctdb_db->unhealthy_reason;
+ ctdb_db->unhealthy_reason = NULL;
+
+ val = tdb_fetch(tdb, key);
+ if (val.dsize > 0) {
+ reason = talloc_strndup(ctdb_db,
+ (const char *)val.dptr,
+ val.dsize);
+ if (reason == NULL) {
+ DEBUG(DEBUG_ALERT,(__location__ " talloc_strndup(%d) failed\n",
+ (int)val.dsize));
+ ctdb_db->unhealthy_reason = old;
+ free(val.dptr);
+ return -1;
+ }
+ }
+
+ if (val.dptr) {
+ free(val.dptr);
+ }
+
+ talloc_free(old);
+ ctdb_db->unhealthy_reason = reason;
+ return 0;
+}
+
+int ctdb_update_persistent_health(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db,
+ const char *given_reason,/* NULL means healthy */
+ int num_healthy_nodes)
+{
+ struct tdb_context *tdb = ctdb->db_persistent_health->tdb;
+ int ret;
+ TDB_DATA key;
+ TDB_DATA val;
+ char *new_reason = NULL;
+ char *old_reason = NULL;
+
+ ret = tdb_transaction_start(tdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT,(__location__ " tdb_transaction_start('%s') failed: %d - %s\n",
+ tdb_name(tdb), ret, tdb_errorstr(tdb)));
+ return -1;
+ }
+
+ ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT,(__location__ " ctdb_load_persistent_health('%s') failed: %d\n",
+ ctdb_db->db_name, ret));
+ return -1;
+ }
+ old_reason = ctdb_db->unhealthy_reason;
+
+ key.dptr = discard_const_p(uint8_t, ctdb_db->db_name);
+ key.dsize = strlen(ctdb_db->db_name);
+
+ if (given_reason) {
+ new_reason = talloc_strdup(ctdb_db, given_reason);
+ if (new_reason == NULL) {
+ DEBUG(DEBUG_ALERT,(__location__ " talloc_strdup(%s) failed\n",
+ given_reason));
+ return -1;
+ }
+ } else if (old_reason && num_healthy_nodes == 0) {
+ /*
+ * If the reason indicates ok, but there where no healthy nodes
+ * available, that it means, we have not recovered valid content
+ * of the db. So if there's an old reason, prefix it with
+ * "NO-HEALTHY-NODES - "
+ */
+ const char *prefix;
+
+#define _TMP_PREFIX "NO-HEALTHY-NODES - "
+ ret = strncmp(_TMP_PREFIX, old_reason, strlen(_TMP_PREFIX));
+ if (ret != 0) {
+ prefix = _TMP_PREFIX;
+ } else {
+ prefix = "";
+ }
+ new_reason = talloc_asprintf(ctdb_db, "%s%s",
+ prefix, old_reason);
+ if (new_reason == NULL) {
+ DEBUG(DEBUG_ALERT,(__location__ " talloc_asprintf(%s%s) failed\n",
+ prefix, old_reason));
+ return -1;
+ }
+#undef _TMP_PREFIX
+ }
+
+ if (new_reason) {
+ val.dptr = discard_const_p(uint8_t, new_reason);
+ val.dsize = strlen(new_reason);
+
+ ret = tdb_store(tdb, key, val, TDB_REPLACE);
+ if (ret != 0) {
+ tdb_transaction_cancel(tdb);
+ DEBUG(DEBUG_ALERT,(__location__ " tdb_store('%s', %s, %s) failed: %d - %s\n",
+ tdb_name(tdb), ctdb_db->db_name, new_reason,
+ ret, tdb_errorstr(tdb)));
+ talloc_free(new_reason);
+ return -1;
+ }
+ DEBUG(DEBUG_ALERT,("Updated db health for db(%s) to: %s\n",
+ ctdb_db->db_name, new_reason));
+ } else if (old_reason) {
+ ret = tdb_delete(tdb, key);
+ if (ret != 0) {
+ tdb_transaction_cancel(tdb);
+ DEBUG(DEBUG_ALERT,(__location__ " tdb_delete('%s', %s) failed: %d - %s\n",
+ tdb_name(tdb), ctdb_db->db_name,
+ ret, tdb_errorstr(tdb)));
+ talloc_free(new_reason);
+ return -1;
+ }
+ DEBUG(DEBUG_NOTICE,("Updated db health for db(%s): OK\n",
+ ctdb_db->db_name));
+ }
+
+ ret = tdb_transaction_commit(tdb);
+ if (ret != TDB_SUCCESS) {
+ DEBUG(DEBUG_ALERT,(__location__ " tdb_transaction_commit('%s') failed: %d - %s\n",
+ tdb_name(tdb), ret, tdb_errorstr(tdb)));
+ talloc_free(new_reason);
+ return -1;
+ }
+
+ talloc_free(old_reason);
+ ctdb_db->unhealthy_reason = new_reason;
+
+ return 0;
+}
+
+static int ctdb_backup_corrupted_tdb(struct ctdb_context *ctdb,
+ struct ctdb_db_context *ctdb_db)
+{
+ time_t now = time(NULL);
+ char *new_path;
+ char *new_reason;
+ int ret;
+ struct tm *tm;
+
+ tm = gmtime(&now);
+
+ /* formatted like: foo.tdb.0.corrupted.20091204160825.0Z */
+ new_path = talloc_asprintf(ctdb_db, "%s.corrupted."
+ "%04u%02u%02u%02u%02u%02u.0Z",
+ ctdb_db->db_path,
+ tm->tm_year+1900, tm->tm_mon+1,
+ tm->tm_mday, tm->tm_hour, tm->tm_min,
+ tm->tm_sec);
+ if (new_path == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+ return -1;
+ }
+
+ new_reason = talloc_asprintf(ctdb_db,
+ "ERROR - Backup of corrupted TDB in '%s'",
+ new_path);
+ if (new_reason == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+ return -1;
+ }
+ ret = ctdb_update_persistent_health(ctdb, ctdb_db, new_reason, 0);
+ talloc_free(new_reason);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ ": ctdb_backup_corrupted_tdb(%s) not implemented yet\n",
+ ctdb_db->db_path));
+ return -1;
+ }
+
+ ret = rename(ctdb_db->db_path, new_path);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ ": ctdb_backup_corrupted_tdb(%s) rename to %s failed: %d - %s\n",
+ ctdb_db->db_path, new_path,
+ errno, strerror(errno)));
+ talloc_free(new_path);
+ return -1;
+ }
+
+ DEBUG(DEBUG_CRIT,(__location__
+ ": ctdb_backup_corrupted_tdb(%s) renamed to %s\n",
+ ctdb_db->db_path, new_path));
+ talloc_free(new_path);
+ return 0;
+}
+
+int ctdb_recheck_persistent_health(struct ctdb_context *ctdb)
+{
+ struct ctdb_db_context *ctdb_db;
+ int ret;
+ int ok = 0;
+ int fail = 0;
+
+ for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
+ if (!ctdb_db->persistent) {
+ continue;
+ }
+
+ ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT,(__location__
+ " load persistent health for '%s' failed\n",
+ ctdb_db->db_path));
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason == NULL) {
+ ok++;
+ DEBUG(DEBUG_INFO,(__location__
+ " persistent db '%s' healthy\n",
+ ctdb_db->db_path));
+ continue;
+ }
+
+ fail++;
+ DEBUG(DEBUG_ALERT,(__location__
+ " persistent db '%s' unhealthy: %s\n",
+ ctdb_db->db_path,
+ ctdb_db->unhealthy_reason));
+ }
+ DEBUG((fail!=0)?DEBUG_ALERT:DEBUG_NOTICE,
+ ("ctdb_recheck_presistent_health: OK[%d] FAIL[%d]\n",
+ ok, fail));
+
+ if (fail != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ mark a database - as healthy
+ */
+int32_t ctdb_control_db_set_healthy(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ uint32_t db_id = *(uint32_t *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+ int ret;
+ bool may_recover = false;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", db_id));
+ return -1;
+ }
+
+ if (ctdb_db->unhealthy_reason) {
+ may_recover = true;
+ }
+
+ ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, 1);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__
+ " ctdb_update_persistent_health(%s) failed\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ if (may_recover && !ctdb->done_startup) {
+ DEBUG(DEBUG_ERR, (__location__ " db %s become healthy - force recovery for startup\n",
+ ctdb_db->db_name));
+ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+ }
+
+ return 0;
+}
+
+int32_t ctdb_control_db_get_health(struct ctdb_context *ctdb,
+ TDB_DATA indata,
+ TDB_DATA *outdata)
+{
+ uint32_t db_id = *(uint32_t *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+ int ret;
+
+ ctdb_db = find_ctdb_db(ctdb, db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", db_id));
+ return -1;
+ }
+
+ ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__
+ " ctdb_load_persistent_health(%s) failed\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ *outdata = tdb_null;
+ if (ctdb_db->unhealthy_reason) {
+ outdata->dptr = (uint8_t *)ctdb_db->unhealthy_reason;
+ outdata->dsize = strlen(ctdb_db->unhealthy_reason)+1;
+ }
+
+ return 0;
+}
/*
attach to a database, handling both persistent and non-persistent databases
return 0 on success, -1 on failure
*/
-static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, bool persistent)
+static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name,
+ bool persistent, const char *unhealthy_reason)
{
struct ctdb_db_context *ctdb_db, *tmp_db;
int ret;
struct TDB_DATA key;
unsigned tdb_flags;
+ int mode = 0600;
+ int remaining_tries = 0;
ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
CTDB_NO_MEMORY(ctdb, ctdb_db);
}
}
- if (ctdb->db_directory == NULL) {
- ctdb->db_directory = VARDIR "/ctdb";
+ if (persistent) {
+ if (unhealthy_reason) {
+ ret = ctdb_update_persistent_health(ctdb, ctdb_db,
+ unhealthy_reason, 0);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT,(__location__ " ctdb_update_persistent_health('%s','%s') failed: %d\n",
+ ctdb_db->db_name, unhealthy_reason, ret));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+ }
+
+ if (ctdb->max_persistent_check_errors > 0) {
+ remaining_tries = 1;
+ }
+ if (ctdb->done_startup) {
+ remaining_tries = 0;
+ }
+
+ ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT,(__location__ " ctdb_load_persistent_health('%s') failed: %d\n",
+ ctdb_db->db_name, ret));
+ talloc_free(ctdb_db);
+ return -1;
+ }
}
- /* make sure the db directory exists */
- if (mkdir(ctdb->db_directory, 0700) == -1 && errno != EEXIST) {
- DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb directory '%s'\n",
- ctdb->db_directory));
+ if (ctdb_db->unhealthy_reason && remaining_tries == 0) {
+ DEBUG(DEBUG_ALERT,(__location__ "ERROR: tdb %s is marked as unhealthy: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
talloc_free(ctdb_db);
return -1;
}
- if (persistent && mkdir(ctdb->db_directory_persistent, 0700) == -1 && errno != EEXIST) {
- DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb persistent directory '%s'\n",
- ctdb->db_directory_persistent));
- talloc_free(ctdb_db);
- return -1;
+ if (ctdb_db->unhealthy_reason) {
+ /* this is just a warning, but we want that in the log file! */
+ DEBUG(DEBUG_ALERT,(__location__ "Warning: tdb %s is marked as unhealthy: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
}
/* open the database */
db_name, ctdb->pnn);
tdb_flags = persistent? TDB_DEFAULT : TDB_CLEAR_IF_FIRST | TDB_NOSYNC;
- if (!ctdb->do_setsched) {
+ if (ctdb->valgrinding) {
tdb_flags |= TDB_NOMMAP;
}
+ tdb_flags |= TDB_DISALLOW_NESTING;
+again:
ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path,
ctdb->tunable.database_hash_size,
tdb_flags,
- O_CREAT|O_RDWR, 0666);
+ O_CREAT|O_RDWR, mode);
if (ctdb_db->ltdb == NULL) {
- DEBUG(DEBUG_CRIT,("Failed to open tdb '%s'\n", ctdb_db->db_path));
- talloc_free(ctdb_db);
- return -1;
+ struct stat st;
+ int saved_errno = errno;
+
+ if (!persistent) {
+ DEBUG(DEBUG_CRIT,("Failed to open tdb '%s': %d - %s\n",
+ ctdb_db->db_path,
+ saved_errno,
+ strerror(saved_errno)));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ if (remaining_tries == 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ "Failed to open persistent tdb '%s': %d - %s\n",
+ ctdb_db->db_path,
+ saved_errno,
+ strerror(saved_errno)));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ ret = stat(ctdb_db->db_path, &st);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ "Failed to open persistent tdb '%s': %d - %s\n",
+ ctdb_db->db_path,
+ saved_errno,
+ strerror(saved_errno)));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ ret = ctdb_backup_corrupted_tdb(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ "Failed to open persistent tdb '%s': %d - %s\n",
+ ctdb_db->db_path,
+ saved_errno,
+ strerror(saved_errno)));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ remaining_tries--;
+ mode = st.st_mode;
+ goto again;
}
if (!persistent) {
ctdb_check_db_empty(ctdb_db);
+ } else {
+ ret = tdb_check(ctdb_db->ltdb->tdb, NULL, NULL);
+ if (ret != 0) {
+ int fd;
+ struct stat st;
+
+ DEBUG(DEBUG_CRIT,("tdb_check(%s) failed: %d - %s\n",
+ ctdb_db->db_path, ret,
+ tdb_errorstr(ctdb_db->ltdb->tdb)));
+ if (remaining_tries == 0) {
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ fd = tdb_fd(ctdb_db->ltdb->tdb);
+ ret = fstat(fd, &st);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,(__location__
+ "Failed to fstat() persistent tdb '%s': %d - %s\n",
+ ctdb_db->db_path,
+ errno,
+ strerror(errno)));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ /* close the TDB */
+ talloc_free(ctdb_db->ltdb);
+ ctdb_db->ltdb = NULL;
+
+ ret = ctdb_backup_corrupted_tdb(ctdb, ctdb_db);
+ if (ret != 0) {
+ DEBUG(DEBUG_CRIT,("Failed to backup corrupted tdb '%s'\n",
+ ctdb_db->db_path));
+ talloc_free(ctdb_db);
+ return -1;
+ }
+
+ remaining_tries--;
+ mode = st.st_mode;
+ goto again;
+ }
}
DLIST_ADD(ctdb->db_list, ctdb_db);
return 0;
}
- if (ctdb_local_attach(ctdb, db_name, persistent) != 0) {
+ if (ctdb_local_attach(ctdb, db_name, persistent, NULL) != 0) {
return -1;
}
outdata->dptr = (uint8_t *)&db->db_id;
outdata->dsize = sizeof(db->db_id);
+ /* Try to ensure it's locked in mem */
+ ctdb_lockdown_memory(ctdb);
+
/* tell all the other nodes about this database */
ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:
/*
attach to all existing persistent databases
*/
-int ctdb_attach_persistent(struct ctdb_context *ctdb)
+static int ctdb_attach_persistent(struct ctdb_context *ctdb,
+ const char *unhealthy_reason)
{
DIR *d;
struct dirent *de;
}
p[4] = 0;
- if (ctdb_local_attach(ctdb, s, true) != 0) {
+ if (ctdb_local_attach(ctdb, s, true, unhealthy_reason) != 0) {
DEBUG(DEBUG_ERR,("Failed to attach to persistent database '%s'\n", de->d_name));
closedir(d);
talloc_free(s);
return -1;
}
+
DEBUG(DEBUG_INFO,("Attached to persistent database %s\n", s));
talloc_free(s);
return 0;
}
+int ctdb_attach_databases(struct ctdb_context *ctdb)
+{
+ int ret;
+ char *persistent_health_path = NULL;
+ char *unhealthy_reason = NULL;
+ bool first_try = true;
+
+ if (ctdb->db_directory == NULL) {
+ ctdb->db_directory = VARDIR "/ctdb";
+ }
+ if (ctdb->db_directory_persistent == NULL) {
+ ctdb->db_directory_persistent = VARDIR "/ctdb/persistent";
+ }
+ if (ctdb->db_directory_state == NULL) {
+ ctdb->db_directory_state = VARDIR "/ctdb/state";
+ }
+
+ /* make sure the db directory exists */
+ ret = mkdir(ctdb->db_directory, 0700);
+ if (ret == -1 && errno != EEXIST) {
+ DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb directory '%s'\n",
+ ctdb->db_directory));
+ return -1;
+ }
+
+ /* make sure the persistent db directory exists */
+ ret = mkdir(ctdb->db_directory_persistent, 0700);
+ if (ret == -1 && errno != EEXIST) {
+ DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb persistent directory '%s'\n",
+ ctdb->db_directory_persistent));
+ return -1;
+ }
+
+ /* make sure the internal state db directory exists */
+ ret = mkdir(ctdb->db_directory_state, 0700);
+ if (ret == -1 && errno != EEXIST) {
+ DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb state directory '%s'\n",
+ ctdb->db_directory_state));
+ return -1;
+ }
+
+ persistent_health_path = talloc_asprintf(ctdb, "%s/%s.%u",
+ ctdb->db_directory_state,
+ PERSISTENT_HEALTH_TDB,
+ ctdb->pnn);
+ if (persistent_health_path == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+ return -1;
+ }
+
+again:
+
+ ctdb->db_persistent_health = tdb_wrap_open(ctdb, persistent_health_path,
+ 0, TDB_DISALLOW_NESTING,
+ O_CREAT | O_RDWR, 0600);
+ if (ctdb->db_persistent_health == NULL) {
+ struct tdb_wrap *tdb;
+
+ if (!first_try) {
+ DEBUG(DEBUG_CRIT,("Failed to open tdb '%s': %d - %s\n",
+ persistent_health_path,
+ errno,
+ strerror(errno)));
+ talloc_free(persistent_health_path);
+ talloc_free(unhealthy_reason);
+ return -1;
+ }
+ first_try = false;
+
+ unhealthy_reason = talloc_asprintf(ctdb, "WARNING - '%s' %s - %s",
+ persistent_health_path,
+ "was cleared after a failure",
+ "manual verification needed");
+ if (unhealthy_reason == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+ talloc_free(persistent_health_path);
+ return -1;
+ }
+
+ DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - retrying after CLEAR_IF_FIRST\n",
+ persistent_health_path));
+ tdb = tdb_wrap_open(ctdb, persistent_health_path,
+ 0, TDB_CLEAR_IF_FIRST | TDB_DISALLOW_NESTING,
+ O_CREAT | O_RDWR, 0600);
+ if (tdb) {
+ DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - with CLEAR_IF_FIRST: %d - %s\n",
+ persistent_health_path,
+ errno,
+ strerror(errno)));
+ talloc_free(persistent_health_path);
+ talloc_free(unhealthy_reason);
+ return -1;
+ }
+
+ talloc_free(tdb);
+ goto again;
+ }
+ ret = tdb_check(ctdb->db_persistent_health->tdb, NULL, NULL);
+ if (ret != 0) {
+ struct tdb_wrap *tdb;
+
+ talloc_free(ctdb->db_persistent_health);
+ ctdb->db_persistent_health = NULL;
+
+ if (!first_try) {
+ DEBUG(DEBUG_CRIT,("tdb_check('%s') failed\n",
+ persistent_health_path));
+ talloc_free(persistent_health_path);
+ talloc_free(unhealthy_reason);
+ return -1;
+ }
+ first_try = false;
+
+ unhealthy_reason = talloc_asprintf(ctdb, "WARNING - '%s' %s - %s",
+ persistent_health_path,
+ "was cleared after a failure",
+ "manual verification needed");
+ if (unhealthy_reason == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+ talloc_free(persistent_health_path);
+ return -1;
+ }
+
+ DEBUG(DEBUG_CRIT,("tdb_check('%s') failed - retrying after CLEAR_IF_FIRST\n",
+ persistent_health_path));
+ tdb = tdb_wrap_open(ctdb, persistent_health_path,
+ 0, TDB_CLEAR_IF_FIRST | TDB_DISALLOW_NESTING,
+ O_CREAT | O_RDWR, 0600);
+ if (tdb) {
+ DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - with CLEAR_IF_FIRST: %d - %s\n",
+ persistent_health_path,
+ errno,
+ strerror(errno)));
+ talloc_free(persistent_health_path);
+ talloc_free(unhealthy_reason);
+ return -1;
+ }
+
+ talloc_free(tdb);
+ goto again;
+ }
+ talloc_free(persistent_health_path);
+
+ ret = ctdb_attach_persistent(ctdb, unhealthy_reason);
+ talloc_free(unhealthy_reason);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
/*
called when a broadcast seqnum update comes in
*/
return -1;
}
+ if (ctdb_db->unhealthy_reason) {
+ DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_ltdb_update_seqnum: %s\n",
+ ctdb_db->db_name, ctdb_db->unhealthy_reason));
+ return -1;
+ }
+
tdb_increment_seqnum_nonblock(ctdb_db->ltdb->tdb);
ctdb_db->seqnum = tdb_get_seqnum(ctdb_db->ltdb->tdb);
return 0;
const char *recovery_lock_file;
const char *db_dir;
const char *db_dir_persistent;
+ const char *db_dir_state;
const char *public_interface;
const char *single_public_ip;
const char *node_ip;
- int no_setsched;
+ int valgrinding;
int use_syslog;
int start_as_disabled;
int start_as_stopped;
int lvs;
int script_log_level;
int no_publicipcheck;
+ int max_persistent_check_errors;
} options = {
.nlist = ETCDIR "/ctdb/nodes",
.transport = "tcp",
.logfile = LOGDIR "/log.ctdb",
.db_dir = VARDIR "/ctdb",
.db_dir_persistent = VARDIR "/ctdb/persistent",
+ .db_dir_state = VARDIR "/ctdb/state",
.script_log_level = DEBUG_ERR,
};
{ "transport", 0, POPT_ARG_STRING, &options.transport, 0, "protocol transport", NULL },
{ "dbdir", 0, POPT_ARG_STRING, &options.db_dir, 0, "directory for the tdb files", NULL },
{ "dbdir-persistent", 0, POPT_ARG_STRING, &options.db_dir_persistent, 0, "directory for persistent tdb files", NULL },
+ { "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
{ "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
- { "nosetsched", 0, POPT_ARG_NONE, &options.no_setsched, 0, "disable setscheduler SCHED_FIFO call", NULL },
+ { "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "make valgrind more effective", NULL },
{ "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
{ "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
{ "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
{ "lvs", 0, POPT_ARG_NONE, &options.lvs, 0, "lvs is enabled on this node", NULL },
{ "script-log-level", 0, POPT_ARG_INT, &options.script_log_level, DEBUG_ERR, "log level of event script output", NULL },
{ "nopublicipcheck", 0, POPT_ARG_NONE, &options.no_publicipcheck, 0, "dont check we have/dont have the correct public ip addresses", NULL },
+ { "max-persistent-check-errors", 0, POPT_ARG_INT,
+ &options.max_persistent_check_errors, 0,
+ "max allowed persistent check errors (default 0)", NULL },
POPT_TABLEEND
};
int opt, ret;
exit(1);
}
}
+ if (options.db_dir_state) {
+ ret = ctdb_set_tdb_dir_state(ctdb, options.db_dir_state);
+ if (ret == -1) {
+ DEBUG(DEBUG_ALERT,("ctdb_set_tdb_dir_state failed - %s\n", ctdb_errstr(ctdb)));
+ exit(1);
+ }
+ }
if (options.public_interface) {
ctdb->default_public_interface = talloc_strdup(ctdb, options.public_interface);
}
}
- ctdb->do_setsched = !options.no_setsched;
+ ctdb->valgrinding = options.valgrinding;
ctdb->do_checkpublicip = !options.no_publicipcheck;
+ if (options.max_persistent_check_errors < 0) {
+ ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
+ } else {
+ ctdb->max_persistent_check_errors = (uint64_t)options.max_persistent_check_errors;
+ }
+
if (getenv("CTDB_BASE") == NULL) {
/* setup a environment variable for the event scripts to use
to find the installation directory */