fix a conflict in the merge from rusty
authorRonnie Sahlberg <ronniesahlberg@gmail.com>
Wed, 16 Dec 2009 21:18:04 +0000 (08:18 +1100)
committerRonnie Sahlberg <ronniesahlberg@gmail.com>
Wed, 16 Dec 2009 21:18:04 +0000 (08:18 +1100)
Merge commit 'rusty/ctdb-no-setsched'

Conflicts:

server/ctdb_vacuum.c

1  2 
client/ctdb_client.c
config/ctdb.init
include/ctdb_private.h
server/ctdb_daemon.c
server/ctdb_ltdb_server.c
server/ctdb_recoverd.c
server/ctdb_vacuum.c
server/ctdbd.c

diff --combined client/ctdb_client.c
index 7a26160b1d06a44843e6df2c5a5f9c42086aaab3,236e21a5dc566c6f26da75832a67d31749be10f9..4aad40060a6fe91ea0237aad3d09803f4ff56ad2
@@@ -1497,44 -1497,6 +1497,44 @@@ int ctdb_ctrl_getdbname(struct ctdb_con
        return 0;
  }
  
 +/*
 +  get the health status of a db
 + */
 +int ctdb_ctrl_getdbhealth(struct ctdb_context *ctdb,
 +                        struct timeval timeout,
 +                        uint32_t destnode,
 +                        uint32_t dbid, TALLOC_CTX *mem_ctx,
 +                        const char **reason)
 +{
 +      int ret;
 +      int32_t res;
 +      TDB_DATA data;
 +
 +      data.dptr = (uint8_t *)&dbid;
 +      data.dsize = sizeof(dbid);
 +
 +      ret = ctdb_control(ctdb, destnode, 0,
 +                         CTDB_CONTROL_DB_GET_HEALTH, 0, data,
 +                         mem_ctx, &data, &res, &timeout, NULL);
 +      if (ret != 0 || res != 0) {
 +              return -1;
 +      }
 +
 +      if (data.dsize == 0) {
 +              (*reason) = NULL;
 +              return 0;
 +      }
 +
 +      (*reason) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
 +      if ((*reason) == NULL) {
 +              return -1;
 +      }
 +
 +      talloc_free(data.dptr);
 +
 +      return 0;
 +}
 +
  /*
    create a database
   */
@@@ -1721,10 -1683,9 +1721,10 @@@ struct ctdb_db_context *ctdb_attach(str
        }
  
        tdb_flags = persistent?TDB_DEFAULT:TDB_NOSYNC;
-       if (!ctdb->do_setsched) {
+       if (ctdb->valgrinding) {
                tdb_flags |= TDB_NOMMAP;
        }
 +      tdb_flags |= TDB_DISALLOW_NESTING;
  
        ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 0, tdb_flags, O_RDWR, 0);
        if (ctdb_db->ltdb == NULL) {
@@@ -1887,7 -1848,7 +1887,7 @@@ int ctdb_traverse(struct ctdb_db_contex
  /*
    called on each key during a catdb
   */
 -static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
 +int ctdb_dumpdb_record(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
  {
        int i;
        FILE *f = (FILE *)p;
   */
  int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f)
  {
 -      return ctdb_traverse(ctdb_db, dumpdb_fn, f);
 +      return ctdb_traverse(ctdb_db, ctdb_dumpdb_record, f);
  }
  
  /*
@@@ -3666,11 -3627,6 +3666,6 @@@ int switch_from_server_to_client(struc
        close(ctdb->daemon.sd);
        ctdb->daemon.sd = -1;
  
-       /* the client does not need to be realtime */
-       if (ctdb->do_setsched) {
-               ctdb_restore_scheduler(ctdb);
-       }
        /* initialise ctdb */
        ret = ctdb_socket_connect(ctdb);
        if (ret != 0) {
diff --combined config/ctdb.init
index ff8b387377766fd269014dd919642674ce7b55cc,1bd329ea71fb8826e4e1089775ebdbd0ddfde283..2e25cf168c4238e61b06fe8854fafb723e0f5768
@@@ -103,46 -103,13 +103,46 @@@ build_ctdb_options () 
      maybe_set "--lvs --single-public-ip" "$CTDB_LVS_PUBLIC_IP"
      maybe_set "--script-log-level"       "$CTDB_SCRIPT_LOG_LEVEL"
      maybe_set "--syslog"                 "$CTDB_SYSLOG"               "yes"
 +    maybe_set "--max-persistent-check-errors" "$CTDB_MAX_PERSISTENT_CHECK_ERRORS"
 +}
 +
 +check_tdb () {
 +      local PDBASE=$1
 +
 +      local TDBTOOL_HAS_CHECK=`echo "help" | /usr/bin/tdbtool | grep check | wc -l`
 +
 +      test x"$TDBTOOL_HAS_CHECK" = x"1" && {
 +              #
 +              # Note tdbtool always exits with 0
 +              #
 +              local OK=`/usr/bin/tdbtool $PDBASE check | grep "Database integrity is OK" | wc -l`
 +              test x"$OK" = x"1" || {
 +                      return 1;
 +              }
 +
 +              return 0;
 +      }
 +
 +      /usr/bin/tdbdump $PDBASE >/dev/null 2>/dev/null || {
 +              return $?;
 +      }
 +
 +      return 0;
  }
  
  check_persistent_databases () {
      PERSISTENT_DB_DIR="${CTDB_DBDIR:-/var/ctdb}/persistent"
      mkdir -p $PERSISTENT_DB_DIR 2>/dev/null
 +    local ERRCOUNT=$CTDB_MAX_PERSISTENT_CHECK_ERRORS
 +
 +    test -z "$ERRCOUNT" && {
 +      ERRCOUNT="0"
 +    }
 +    test x"$ERRCOUNT" != x"0" && {
 +      return 0;
 +    }
      for PDBASE in `ls $PERSISTENT_DB_DIR/*.tdb.[0-9] 2>/dev/null`; do
 -      /usr/bin/tdbdump $PDBASE >/dev/null 2>/dev/null || {
 +      check_tdb $PDBASE || {
            echo "Persistent database $PDBASE is corrupted! CTDB will not start."
            return 1
        }
@@@ -186,7 -153,7 +186,7 @@@ start() 
      case $init_style in
        valgrind)
            valgrind -q --log-file=/var/log/ctdb_valgrind \
-               $ctdbd --nosetsched $CTDB_OPTIONS 
+               $ctdbd --valgrinding $CTDB_OPTIONS
            RETVAL=$?
            echo
            ;;
diff --combined include/ctdb_private.h
index fdd205d050f6dfab31c3d83843e992d33cbac4f4,de74bb88fdf29b7ffae0de4ad35cdc2aca73ab77..6107bdfc2d3286ae4b3eda95f05aa57419aaebe4
@@@ -129,7 -129,6 +129,7 @@@ struct ctdb_tunable 
        uint32_t vacuum_max_interval;
        uint32_t max_queue_depth_drop_msg;
        uint32_t use_status_events_for_monitoring;
 +      uint32_t allow_unhealthy_db_read;
  };
  
  /*
@@@ -406,11 -405,6 +406,11 @@@ struct ctdb_context 
        const char *name;
        const char *db_directory;
        const char *db_directory_persistent;
 +      const char *db_directory_state;
 +      struct tdb_wrap *db_persistent_health;
 +      uint32_t db_persistent_startup_generation;
 +      uint64_t db_persistent_check_errors;
 +      uint64_t max_persistent_check_errors;
        const char *transport;
        char *recovery_lock_file;
        int recovery_lock_fd;
        uint32_t recovery_master;
        struct ctdb_call_state *pending_calls;
        struct ctdb_client_ip *client_ip_list;
-       bool do_setsched;
        bool do_checkpublicip;
-       void *saved_scheduler_param;
        struct _trbt_tree_t *server_ids;        
        const char *event_script_dir;
        const char *notification_script;
        struct ctdb_log_state *log;
        int start_as_disabled;
        int start_as_stopped;
+       bool valgrinding;
        uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
        uint32_t *recd_ping_count;
        TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
@@@ -483,7 -476,6 +482,7 @@@ struct ctdb_db_context 
        struct ctdb_traverse_local_handle *traverse;
        bool transaction_active;
        struct ctdb_vacuum_handle *vacuum_handle;
 +      char *unhealthy_reason;
  };
  
  
@@@ -633,8 -625,6 +632,8 @@@ enum ctdb_controls {CTDB_CONTROL_PROCES
                    CTDB_CONTROL_CLEAR_LOG               = 118,
                    CTDB_CONTROL_TRANS3_COMMIT           = 119,
                    CTDB_CONTROL_GET_DB_SEQNUM           = 120,
 +                  CTDB_CONTROL_DB_SET_HEALTHY          = 121,
 +                  CTDB_CONTROL_DB_GET_HEALTH           = 122,
  };    
  
  /*
@@@ -1252,8 -1242,7 +1251,7 @@@ void ctdb_call_resend_all(struct ctdb_c
  void ctdb_node_dead(struct ctdb_node *node);
  void ctdb_node_connected(struct ctdb_node *node);
  bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
- void ctdb_set_scheduler(struct ctdb_context *ctdb);
- void ctdb_restore_scheduler(struct ctdb_context *ctdb);
+ void ctdb_high_priority(struct ctdb_context *ctdb);
  int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
                                 struct ctdb_req_control *c,
                                 TDB_DATA indata, 
@@@ -1424,7 -1413,7 +1422,7 @@@ int32_t ctdb_control_get_server_id_list
  int32_t ctdb_control_uptime(struct ctdb_context *ctdb, 
                      TDB_DATA *outdata);
  
 -int ctdb_attach_persistent(struct ctdb_context *ctdb);
 +int ctdb_attach_databases(struct ctdb_context *ctdb);
  
  int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb, 
                                      struct ctdb_req_control *c, 
@@@ -1444,10 -1433,6 +1442,10 @@@ int32_t ctdb_control_transaction_start(
  int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id);
  int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb);
  int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata);
 +int32_t ctdb_control_db_set_healthy(struct ctdb_context *ctdb, TDB_DATA indata);
 +int32_t ctdb_control_db_get_health(struct ctdb_context *ctdb,
 +                                 TDB_DATA indata,
 +                                 TDB_DATA *outdata);
  
  
  int ctdb_vacuum(struct ctdb_context *ctdb, int argc, const char **argv);
@@@ -1457,7 -1442,7 +1455,7 @@@ void ctdb_block_signal(int signum)
  void ctdb_unblock_signal(int signum);
  int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb);
  int ctdb_set_child_logging(struct ctdb_context *ctdb);
+ void ctdb_lockdown_memory(struct ctdb_context *ctdb);
  
  typedef void (*client_async_callback)(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data);
  
@@@ -1554,12 -1539,4 +1552,12 @@@ int32_t ctdb_control_get_db_seqnum(stru
                                   TDB_DATA indata,
                                   TDB_DATA *outdata);
  
 +int ctdb_load_persistent_health(struct ctdb_context *ctdb,
 +                              struct ctdb_db_context *ctdb_db);
 +int ctdb_update_persistent_health(struct ctdb_context *ctdb,
 +                                struct ctdb_db_context *ctdb_db,
 +                                const char *reason,/* NULL means healthy */
 +                                int num_healthy_nodes);
 +int ctdb_recheck_persistent_health(struct ctdb_context *ctdb);
 +
  #endif
diff --combined server/ctdb_daemon.c
index 7f3128b4698465e782e5e92c38386cd29f08c48e,cc076fbc8390e795c8bd8649af9de51f90a44929..e96b369ece602a1e2126fff29b5cb93d442c5908
@@@ -354,16 -354,6 +354,16 @@@ static void daemon_request_call_from_cl
                return;
        }
  
 +      if (ctdb_db->unhealthy_reason) {
 +              /*
 +               * this is just a warning, as the tdb should be empty anyway,
 +               * and only persistent databases can be unhealthy, which doesn't
 +               * use this code patch
 +               */
 +              DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in daemon_request_call_from_client(): %s\n",
 +                                   ctdb_db->db_name, ctdb_db->unhealthy_reason));
 +      }
 +
        key.dptr = c->data;
        key.dsize = c->keylen;
  
@@@ -730,10 -720,7 +730,7 @@@ int ctdb_start_daemon(struct ctdb_conte
  
        DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid));
  
-       if (ctdb->do_setsched) {
-               /* try to set us up as realtime */
-               ctdb_set_scheduler(ctdb);
-       }
+       ctdb_high_priority(ctdb);
  
        /* ensure the socket is deleted on exit of the daemon */
        domain_socket_name = talloc_strdup(talloc_autofree_context(), ctdb->daemon.name);
                ctdb_fatal(ctdb, "transport failed to initialise");
        }
  
 -      /* attach to any existing persistent databases */
 -      if (ctdb_attach_persistent(ctdb) != 0) {
 -              ctdb_fatal(ctdb, "Failed to attach to persistent databases\n");         
 +      /* attach to existing databases */
 +      if (ctdb_attach_databases(ctdb) != 0) {
 +              ctdb_fatal(ctdb, "Failed to attach to databases\n");
        }
  
        /* start frozen, then let the first election sort things out */
                }
        }
  
+       ctdb_lockdown_memory(ctdb);
          
        /* go into a wait loop to allow other nodes to complete */
        event_loop_wait(ctdb->ev);
index 47a2d6a693c2d2c488391a86ecdf53cc80bee1a1,c24d4214d54f90ae3005b79dbc77eddb905be9a7..9b3e7e071d0ca0d3dfe423d873cbc06493eae7f4
  #include "system/network.h"
  #include "system/filesys.h"
  #include "system/dir.h"
 +#include "system/time.h"
  #include "../include/ctdb_private.h"
  #include "db_wrap.h"
  #include "lib/util/dlinklist.h"
  #include <ctype.h>
  
 +#define PERSISTENT_HEALTH_TDB "persistent_health.tdb"
 +
  /*
    this is the dummy null procedure that all databases support
  */
@@@ -191,333 -188,17 +191,333 @@@ static void ctdb_check_db_empty(struct 
        }
  }
  
 +int ctdb_load_persistent_health(struct ctdb_context *ctdb,
 +                              struct ctdb_db_context *ctdb_db)
 +{
 +      struct tdb_context *tdb = ctdb->db_persistent_health->tdb;
 +      char *old;
 +      char *reason = NULL;
 +      TDB_DATA key;
 +      TDB_DATA val;
 +
 +      key.dptr = discard_const_p(uint8_t, ctdb_db->db_name);
 +      key.dsize = strlen(ctdb_db->db_name);
 +
 +      old = ctdb_db->unhealthy_reason;
 +      ctdb_db->unhealthy_reason = NULL;
 +
 +      val = tdb_fetch(tdb, key);
 +      if (val.dsize > 0) {
 +              reason = talloc_strndup(ctdb_db,
 +                                      (const char *)val.dptr,
 +                                      val.dsize);
 +              if (reason == NULL) {
 +                      DEBUG(DEBUG_ALERT,(__location__ " talloc_strndup(%d) failed\n",
 +                                         (int)val.dsize));
 +                      ctdb_db->unhealthy_reason = old;
 +                      free(val.dptr);
 +                      return -1;
 +              }
 +      }
 +
 +      if (val.dptr) {
 +              free(val.dptr);
 +      }
 +
 +      talloc_free(old);
 +      ctdb_db->unhealthy_reason = reason;
 +      return 0;
 +}
 +
 +int ctdb_update_persistent_health(struct ctdb_context *ctdb,
 +                                struct ctdb_db_context *ctdb_db,
 +                                const char *given_reason,/* NULL means healthy */
 +                                int num_healthy_nodes)
 +{
 +      struct tdb_context *tdb = ctdb->db_persistent_health->tdb;
 +      int ret;
 +      TDB_DATA key;
 +      TDB_DATA val;
 +      char *new_reason = NULL;
 +      char *old_reason = NULL;
 +
 +      ret = tdb_transaction_start(tdb);
 +      if (ret != 0) {
 +              DEBUG(DEBUG_ALERT,(__location__ " tdb_transaction_start('%s') failed: %d - %s\n",
 +                                 tdb_name(tdb), ret, tdb_errorstr(tdb)));
 +              return -1;
 +      }
 +
 +      ret = ctdb_load_persistent_health(ctdb, ctdb_db);
 +      if (ret != 0) {
 +              DEBUG(DEBUG_ALERT,(__location__ " ctdb_load_persistent_health('%s') failed: %d\n",
 +                                 ctdb_db->db_name, ret));
 +              return -1;
 +      }
 +      old_reason = ctdb_db->unhealthy_reason;
 +
 +      key.dptr = discard_const_p(uint8_t, ctdb_db->db_name);
 +      key.dsize = strlen(ctdb_db->db_name);
 +
 +      if (given_reason) {
 +              new_reason = talloc_strdup(ctdb_db, given_reason);
 +              if (new_reason == NULL) {
 +                      DEBUG(DEBUG_ALERT,(__location__ " talloc_strdup(%s) failed\n",
 +                                        given_reason));
 +                      return -1;
 +              }
 +      } else if (old_reason && num_healthy_nodes == 0) {
 +              /*
 +               * If the reason indicates ok, but there where no healthy nodes
 +               * available, that it means, we have not recovered valid content
 +               * of the db. So if there's an old reason, prefix it with
 +               * "NO-HEALTHY-NODES - "
 +               */
 +              const char *prefix;
 +
 +#define _TMP_PREFIX "NO-HEALTHY-NODES - "
 +              ret = strncmp(_TMP_PREFIX, old_reason, strlen(_TMP_PREFIX));
 +              if (ret != 0) {
 +                      prefix = _TMP_PREFIX;
 +              } else {
 +                      prefix = "";
 +              }
 +              new_reason = talloc_asprintf(ctdb_db, "%s%s",
 +                                       prefix, old_reason);
 +              if (new_reason == NULL) {
 +                      DEBUG(DEBUG_ALERT,(__location__ " talloc_asprintf(%s%s) failed\n",
 +                                        prefix, old_reason));
 +                      return -1;
 +              }
 +#undef _TMP_PREFIX
 +      }
 +
 +      if (new_reason) {
 +              val.dptr = discard_const_p(uint8_t, new_reason);
 +              val.dsize = strlen(new_reason);
 +
 +              ret = tdb_store(tdb, key, val, TDB_REPLACE);
 +              if (ret != 0) {
 +                      tdb_transaction_cancel(tdb);
 +                      DEBUG(DEBUG_ALERT,(__location__ " tdb_store('%s', %s, %s) failed: %d - %s\n",
 +                                         tdb_name(tdb), ctdb_db->db_name, new_reason,
 +                                         ret, tdb_errorstr(tdb)));
 +                      talloc_free(new_reason);
 +                      return -1;
 +              }
 +              DEBUG(DEBUG_ALERT,("Updated db health for db(%s) to: %s\n",
 +                                 ctdb_db->db_name, new_reason));
 +      } else if (old_reason) {
 +              ret = tdb_delete(tdb, key);
 +              if (ret != 0) {
 +                      tdb_transaction_cancel(tdb);
 +                      DEBUG(DEBUG_ALERT,(__location__ " tdb_delete('%s', %s) failed: %d - %s\n",
 +                                         tdb_name(tdb), ctdb_db->db_name,
 +                                         ret, tdb_errorstr(tdb)));
 +                      talloc_free(new_reason);
 +                      return -1;
 +              }
 +              DEBUG(DEBUG_NOTICE,("Updated db health for db(%s): OK\n",
 +                                 ctdb_db->db_name));
 +      }
 +
 +      ret = tdb_transaction_commit(tdb);
 +      if (ret != TDB_SUCCESS) {
 +              DEBUG(DEBUG_ALERT,(__location__ " tdb_transaction_commit('%s') failed: %d - %s\n",
 +                                 tdb_name(tdb), ret, tdb_errorstr(tdb)));
 +              talloc_free(new_reason);
 +              return -1;
 +      }
 +
 +      talloc_free(old_reason);
 +      ctdb_db->unhealthy_reason = new_reason;
 +
 +      return 0;
 +}
 +
 +static int ctdb_backup_corrupted_tdb(struct ctdb_context *ctdb,
 +                                   struct ctdb_db_context *ctdb_db)
 +{
 +      time_t now = time(NULL);
 +      char *new_path;
 +      char *new_reason;
 +      int ret;
 +      struct tm *tm;
 +
 +      tm = gmtime(&now);
 +
 +      /* formatted like: foo.tdb.0.corrupted.20091204160825.0Z */
 +      new_path = talloc_asprintf(ctdb_db, "%s.corrupted."
 +                                 "%04u%02u%02u%02u%02u%02u.0Z",
 +                                 ctdb_db->db_path,
 +                                 tm->tm_year+1900, tm->tm_mon+1,
 +                                 tm->tm_mday, tm->tm_hour, tm->tm_min,
 +                                 tm->tm_sec);
 +      if (new_path == NULL) {
 +              DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
 +              return -1;
 +      }
 +
 +      new_reason = talloc_asprintf(ctdb_db,
 +                                   "ERROR - Backup of corrupted TDB in '%s'",
 +                                   new_path);
 +      if (new_reason == NULL) {
 +              DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
 +              return -1;
 +      }
 +      ret = ctdb_update_persistent_health(ctdb, ctdb_db, new_reason, 0);
 +      talloc_free(new_reason);
 +      if (ret != 0) {
 +              DEBUG(DEBUG_CRIT,(__location__
 +                               ": ctdb_backup_corrupted_tdb(%s) not implemented yet\n",
 +                               ctdb_db->db_path));
 +              return -1;
 +      }
 +
 +      ret = rename(ctdb_db->db_path, new_path);
 +      if (ret != 0) {
 +              DEBUG(DEBUG_CRIT,(__location__
 +                                ": ctdb_backup_corrupted_tdb(%s) rename to %s failed: %d - %s\n",
 +                                ctdb_db->db_path, new_path,
 +                                errno, strerror(errno)));
 +              talloc_free(new_path);
 +              return -1;
 +      }
 +
 +      DEBUG(DEBUG_CRIT,(__location__
 +                       ": ctdb_backup_corrupted_tdb(%s) renamed to %s\n",
 +                       ctdb_db->db_path, new_path));
 +      talloc_free(new_path);
 +      return 0;
 +}
 +
 +int ctdb_recheck_persistent_health(struct ctdb_context *ctdb)
 +{
 +      struct ctdb_db_context *ctdb_db;
 +      int ret;
 +      int ok = 0;
 +      int fail = 0;
 +
 +      for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
 +              if (!ctdb_db->persistent) {
 +                      continue;
 +              }
 +
 +              ret = ctdb_load_persistent_health(ctdb, ctdb_db);
 +              if (ret != 0) {
 +                      DEBUG(DEBUG_ALERT,(__location__
 +                                         " load persistent health for '%s' failed\n",
 +                                         ctdb_db->db_path));
 +                      return -1;
 +              }
 +
 +              if (ctdb_db->unhealthy_reason == NULL) {
 +                      ok++;
 +                      DEBUG(DEBUG_INFO,(__location__
 +                                 " persistent db '%s' healthy\n",
 +                                 ctdb_db->db_path));
 +                      continue;
 +              }
 +
 +              fail++;
 +              DEBUG(DEBUG_ALERT,(__location__
 +                                 " persistent db '%s' unhealthy: %s\n",
 +                                 ctdb_db->db_path,
 +                                 ctdb_db->unhealthy_reason));
 +      }
 +      DEBUG((fail!=0)?DEBUG_ALERT:DEBUG_NOTICE,
 +            ("ctdb_recheck_presistent_health: OK[%d] FAIL[%d]\n",
 +             ok, fail));
 +
 +      if (fail != 0) {
 +              return -1;
 +      }
 +
 +      return 0;
 +}
 +
 +
 +/*
 +  mark a database - as healthy
 + */
 +int32_t ctdb_control_db_set_healthy(struct ctdb_context *ctdb, TDB_DATA indata)
 +{
 +      uint32_t db_id = *(uint32_t *)indata.dptr;
 +      struct ctdb_db_context *ctdb_db;
 +      int ret;
 +      bool may_recover = false;
 +
 +      ctdb_db = find_ctdb_db(ctdb, db_id);
 +      if (!ctdb_db) {
 +              DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", db_id));
 +              return -1;
 +      }
 +
 +      if (ctdb_db->unhealthy_reason) {
 +              may_recover = true;
 +      }
 +
 +      ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, 1);
 +      if (ret != 0) {
 +              DEBUG(DEBUG_ERR,(__location__
 +                               " ctdb_update_persistent_health(%s) failed\n",
 +                               ctdb_db->db_name));
 +              return -1;
 +      }
 +
 +      if (may_recover && !ctdb->done_startup) {
 +              DEBUG(DEBUG_ERR, (__location__ " db %s become healthy  - force recovery for startup\n",
 +                                ctdb_db->db_name));
 +              ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
 +      }
 +
 +      return 0;
 +}
 +
 +int32_t ctdb_control_db_get_health(struct ctdb_context *ctdb,
 +                                 TDB_DATA indata,
 +                                 TDB_DATA *outdata)
 +{
 +      uint32_t db_id = *(uint32_t *)indata.dptr;
 +      struct ctdb_db_context *ctdb_db;
 +      int ret;
 +
 +      ctdb_db = find_ctdb_db(ctdb, db_id);
 +      if (!ctdb_db) {
 +              DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", db_id));
 +              return -1;
 +      }
 +
 +      ret = ctdb_load_persistent_health(ctdb, ctdb_db);
 +      if (ret != 0) {
 +              DEBUG(DEBUG_ERR,(__location__
 +                               " ctdb_load_persistent_health(%s) failed\n",
 +                               ctdb_db->db_name));
 +              return -1;
 +      }
 +
 +      *outdata = tdb_null;
 +      if (ctdb_db->unhealthy_reason) {
 +              outdata->dptr = (uint8_t *)ctdb_db->unhealthy_reason;
 +              outdata->dsize = strlen(ctdb_db->unhealthy_reason)+1;
 +      }
 +
 +      return 0;
 +}
  
  /*
    attach to a database, handling both persistent and non-persistent databases
    return 0 on success, -1 on failure
   */
 -static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, bool persistent)
 +static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name,
 +                           bool persistent, const char *unhealthy_reason)
  {
        struct ctdb_db_context *ctdb_db, *tmp_db;
        int ret;
        struct TDB_DATA key;
        unsigned tdb_flags;
 +      int mode = 0600;
 +      int remaining_tries = 0;
  
        ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
        CTDB_NO_MEMORY(ctdb, ctdb_db);
                }
        }
  
 -      if (ctdb->db_directory == NULL) {
 -              ctdb->db_directory = VARDIR "/ctdb";
 +      if (persistent) {
 +              if (unhealthy_reason) {
 +                      ret = ctdb_update_persistent_health(ctdb, ctdb_db,
 +                                                          unhealthy_reason, 0);
 +                      if (ret != 0) {
 +                              DEBUG(DEBUG_ALERT,(__location__ " ctdb_update_persistent_health('%s','%s') failed: %d\n",
 +                                                 ctdb_db->db_name, unhealthy_reason, ret));
 +                              talloc_free(ctdb_db);
 +                              return -1;
 +                      }
 +              }
 +
 +              if (ctdb->max_persistent_check_errors > 0) {
 +                      remaining_tries = 1;
 +              }
 +              if (ctdb->done_startup) {
 +                      remaining_tries = 0;
 +              }
 +
 +              ret = ctdb_load_persistent_health(ctdb, ctdb_db);
 +              if (ret != 0) {
 +                      DEBUG(DEBUG_ALERT,(__location__ " ctdb_load_persistent_health('%s') failed: %d\n",
 +                                 ctdb_db->db_name, ret));
 +                      talloc_free(ctdb_db);
 +                      return -1;
 +              }
        }
  
 -      /* make sure the db directory exists */
 -      if (mkdir(ctdb->db_directory, 0700) == -1 && errno != EEXIST) {
 -              DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb directory '%s'\n", 
 -                       ctdb->db_directory));
 +      if (ctdb_db->unhealthy_reason && remaining_tries == 0) {
 +              DEBUG(DEBUG_ALERT,(__location__ "ERROR: tdb %s is marked as unhealthy: %s\n",
 +                                 ctdb_db->db_name, ctdb_db->unhealthy_reason));
                talloc_free(ctdb_db);
                return -1;
        }
  
 -      if (persistent && mkdir(ctdb->db_directory_persistent, 0700) == -1 && errno != EEXIST) {
 -              DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb persistent directory '%s'\n", 
 -                       ctdb->db_directory_persistent));
 -              talloc_free(ctdb_db);
 -              return -1;
 +      if (ctdb_db->unhealthy_reason) {
 +              /* this is just a warning, but we want that in the log file! */
 +              DEBUG(DEBUG_ALERT,(__location__ "Warning: tdb %s is marked as unhealthy: %s\n",
 +                                 ctdb_db->db_name, ctdb_db->unhealthy_reason));
        }
  
        /* open the database */
                                           db_name, ctdb->pnn);
  
        tdb_flags = persistent? TDB_DEFAULT : TDB_CLEAR_IF_FIRST | TDB_NOSYNC;
-       if (!ctdb->do_setsched) {
+       if (ctdb->valgrinding) {
                tdb_flags |= TDB_NOMMAP;
        }
 +      tdb_flags |= TDB_DISALLOW_NESTING;
  
 +again:
        ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 
                                      ctdb->tunable.database_hash_size, 
                                      tdb_flags, 
 -                                    O_CREAT|O_RDWR, 0666);
 +                                    O_CREAT|O_RDWR, mode);
        if (ctdb_db->ltdb == NULL) {
 -              DEBUG(DEBUG_CRIT,("Failed to open tdb '%s'\n", ctdb_db->db_path));
 -              talloc_free(ctdb_db);
 -              return -1;
 +              struct stat st;
 +              int saved_errno = errno;
 +
 +              if (!persistent) {
 +                      DEBUG(DEBUG_CRIT,("Failed to open tdb '%s': %d - %s\n",
 +                                        ctdb_db->db_path,
 +                                        saved_errno,
 +                                        strerror(saved_errno)));
 +                      talloc_free(ctdb_db);
 +                      return -1;
 +              }
 +
 +              if (remaining_tries == 0) {
 +                      DEBUG(DEBUG_CRIT,(__location__
 +                                        "Failed to open persistent tdb '%s': %d - %s\n",
 +                                        ctdb_db->db_path,
 +                                        saved_errno,
 +                                        strerror(saved_errno)));
 +                      talloc_free(ctdb_db);
 +                      return -1;
 +              }
 +
 +              ret = stat(ctdb_db->db_path, &st);
 +              if (ret != 0) {
 +                      DEBUG(DEBUG_CRIT,(__location__
 +                                        "Failed to open persistent tdb '%s': %d - %s\n",
 +                                        ctdb_db->db_path,
 +                                        saved_errno,
 +                                        strerror(saved_errno)));
 +                      talloc_free(ctdb_db);
 +                      return -1;
 +              }
 +
 +              ret = ctdb_backup_corrupted_tdb(ctdb, ctdb_db);
 +              if (ret != 0) {
 +                      DEBUG(DEBUG_CRIT,(__location__
 +                                        "Failed to open persistent tdb '%s': %d - %s\n",
 +                                        ctdb_db->db_path,
 +                                        saved_errno,
 +                                        strerror(saved_errno)));
 +                      talloc_free(ctdb_db);
 +                      return -1;
 +              }
 +
 +              remaining_tries--;
 +              mode = st.st_mode;
 +              goto again;
        }
  
        if (!persistent) {
                ctdb_check_db_empty(ctdb_db);
 +      } else {
 +              ret = tdb_check(ctdb_db->ltdb->tdb, NULL, NULL);
 +              if (ret != 0) {
 +                      int fd;
 +                      struct stat st;
 +
 +                      DEBUG(DEBUG_CRIT,("tdb_check(%s) failed: %d - %s\n",
 +                                        ctdb_db->db_path, ret,
 +                                        tdb_errorstr(ctdb_db->ltdb->tdb)));
 +                      if (remaining_tries == 0) {
 +                              talloc_free(ctdb_db);
 +                              return -1;
 +                      }
 +
 +                      fd = tdb_fd(ctdb_db->ltdb->tdb);
 +                      ret = fstat(fd, &st);
 +                      if (ret != 0) {
 +                              DEBUG(DEBUG_CRIT,(__location__
 +                                                "Failed to fstat() persistent tdb '%s': %d - %s\n",
 +                                                ctdb_db->db_path,
 +                                                errno,
 +                                                strerror(errno)));
 +                              talloc_free(ctdb_db);
 +                              return -1;
 +                      }
 +
 +                      /* close the TDB */
 +                      talloc_free(ctdb_db->ltdb);
 +                      ctdb_db->ltdb = NULL;
 +
 +                      ret = ctdb_backup_corrupted_tdb(ctdb, ctdb_db);
 +                      if (ret != 0) {
 +                              DEBUG(DEBUG_CRIT,("Failed to backup corrupted tdb '%s'\n",
 +                                                ctdb_db->db_path));
 +                              talloc_free(ctdb_db);
 +                              return -1;
 +                      }
 +
 +                      remaining_tries--;
 +                      mode = st.st_mode;
 +                      goto again;
 +              }
        }
  
        DLIST_ADD(ctdb->db_list, ctdb_db);
@@@ -774,7 -345,7 +774,7 @@@ int32_t ctdb_control_db_attach(struct c
                return 0;
        }
  
 -      if (ctdb_local_attach(ctdb, db_name, persistent) != 0) {
 +      if (ctdb_local_attach(ctdb, db_name, persistent, NULL) != 0) {
                return -1;
        }
  
        outdata->dptr  = (uint8_t *)&db->db_id;
        outdata->dsize = sizeof(db->db_id);
  
+       /* Try to ensure it's locked in mem */
+       ctdb_lockdown_memory(ctdb);
        /* tell all the other nodes about this database */
        ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
                                 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:
  /*
    attach to all existing persistent databases
   */
 -int ctdb_attach_persistent(struct ctdb_context *ctdb)
 +static int ctdb_attach_persistent(struct ctdb_context *ctdb,
 +                                const char *unhealthy_reason)
  {
        DIR *d;
        struct dirent *de;
                }
                p[4] = 0;
  
 -              if (ctdb_local_attach(ctdb, s, true) != 0) {
 +              if (ctdb_local_attach(ctdb, s, true, unhealthy_reason) != 0) {
                        DEBUG(DEBUG_ERR,("Failed to attach to persistent database '%s'\n", de->d_name));
                        closedir(d);
                        talloc_free(s);
                        return -1;
                }
 +
                DEBUG(DEBUG_INFO,("Attached to persistent database %s\n", s));
  
                talloc_free(s);
        return 0;
  }
  
 +int ctdb_attach_databases(struct ctdb_context *ctdb)
 +{
 +      int ret;
 +      char *persistent_health_path = NULL;
 +      char *unhealthy_reason = NULL;
 +      bool first_try = true;
 +
 +      if (ctdb->db_directory == NULL) {
 +              ctdb->db_directory = VARDIR "/ctdb";
 +      }
 +      if (ctdb->db_directory_persistent == NULL) {
 +              ctdb->db_directory_persistent = VARDIR "/ctdb/persistent";
 +      }
 +      if (ctdb->db_directory_state == NULL) {
 +              ctdb->db_directory_state = VARDIR "/ctdb/state";
 +      }
 +
 +      /* make sure the db directory exists */
 +      ret = mkdir(ctdb->db_directory, 0700);
 +      if (ret == -1 && errno != EEXIST) {
 +              DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb directory '%s'\n",
 +                       ctdb->db_directory));
 +              return -1;
 +      }
 +
 +      /* make sure the persistent db directory exists */
 +      ret = mkdir(ctdb->db_directory_persistent, 0700);
 +      if (ret == -1 && errno != EEXIST) {
 +              DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb persistent directory '%s'\n",
 +                       ctdb->db_directory_persistent));
 +              return -1;
 +      }
 +
 +      /* make sure the internal state db directory exists */
 +      ret = mkdir(ctdb->db_directory_state, 0700);
 +      if (ret == -1 && errno != EEXIST) {
 +              DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb state directory '%s'\n",
 +                       ctdb->db_directory_state));
 +              return -1;
 +      }
 +
 +      persistent_health_path = talloc_asprintf(ctdb, "%s/%s.%u",
 +                                               ctdb->db_directory_state,
 +                                               PERSISTENT_HEALTH_TDB,
 +                                               ctdb->pnn);
 +      if (persistent_health_path == NULL) {
 +              DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
 +              return -1;
 +      }
 +
 +again:
 +
 +      ctdb->db_persistent_health = tdb_wrap_open(ctdb, persistent_health_path,
 +                                                 0, TDB_DISALLOW_NESTING,
 +                                                 O_CREAT | O_RDWR, 0600);
 +      if (ctdb->db_persistent_health == NULL) {
 +              struct tdb_wrap *tdb;
 +
 +              if (!first_try) {
 +                      DEBUG(DEBUG_CRIT,("Failed to open tdb '%s': %d - %s\n",
 +                                        persistent_health_path,
 +                                        errno,
 +                                        strerror(errno)));
 +                      talloc_free(persistent_health_path);
 +                      talloc_free(unhealthy_reason);
 +                      return -1;
 +              }
 +              first_try = false;
 +
 +              unhealthy_reason = talloc_asprintf(ctdb, "WARNING - '%s' %s - %s",
 +                                                 persistent_health_path,
 +                                                 "was cleared after a failure",
 +                                                 "manual verification needed");
 +              if (unhealthy_reason == NULL) {
 +                      DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
 +                      talloc_free(persistent_health_path);
 +                      return -1;
 +              }
 +
 +              DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - retrying after CLEAR_IF_FIRST\n",
 +                                persistent_health_path));
 +              tdb = tdb_wrap_open(ctdb, persistent_health_path,
 +                                  0, TDB_CLEAR_IF_FIRST | TDB_DISALLOW_NESTING,
 +                                  O_CREAT | O_RDWR, 0600);
 +              if (tdb) {
 +                      DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - with CLEAR_IF_FIRST: %d - %s\n",
 +                                        persistent_health_path,
 +                                        errno,
 +                                        strerror(errno)));
 +                      talloc_free(persistent_health_path);
 +                      talloc_free(unhealthy_reason);
 +                      return -1;
 +              }
 +
 +              talloc_free(tdb);
 +              goto again;
 +      }
 +      ret = tdb_check(ctdb->db_persistent_health->tdb, NULL, NULL);
 +      if (ret != 0) {
 +              struct tdb_wrap *tdb;
 +
 +              talloc_free(ctdb->db_persistent_health);
 +              ctdb->db_persistent_health = NULL;
 +
 +              if (!first_try) {
 +                      DEBUG(DEBUG_CRIT,("tdb_check('%s') failed\n",
 +                                        persistent_health_path));
 +                      talloc_free(persistent_health_path);
 +                      talloc_free(unhealthy_reason);
 +                      return -1;
 +              }
 +              first_try = false;
 +
 +              unhealthy_reason = talloc_asprintf(ctdb, "WARNING - '%s' %s - %s",
 +                                                 persistent_health_path,
 +                                                 "was cleared after a failure",
 +                                                 "manual verification needed");
 +              if (unhealthy_reason == NULL) {
 +                      DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
 +                      talloc_free(persistent_health_path);
 +                      return -1;
 +              }
 +
 +              DEBUG(DEBUG_CRIT,("tdb_check('%s') failed - retrying after CLEAR_IF_FIRST\n",
 +                                persistent_health_path));
 +              tdb = tdb_wrap_open(ctdb, persistent_health_path,
 +                                  0, TDB_CLEAR_IF_FIRST | TDB_DISALLOW_NESTING,
 +                                  O_CREAT | O_RDWR, 0600);
 +              if (tdb) {
 +                      DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - with CLEAR_IF_FIRST: %d - %s\n",
 +                                        persistent_health_path,
 +                                        errno,
 +                                        strerror(errno)));
 +                      talloc_free(persistent_health_path);
 +                      talloc_free(unhealthy_reason);
 +                      return -1;
 +              }
 +
 +              talloc_free(tdb);
 +              goto again;
 +      }
 +      talloc_free(persistent_health_path);
 +
 +      ret = ctdb_attach_persistent(ctdb, unhealthy_reason);
 +      talloc_free(unhealthy_reason);
 +      if (ret != 0) {
 +              return ret;
 +      }
 +
 +      return 0;
 +}
 +
  /*
    called when a broadcast seqnum update comes in
   */
@@@ -1031,12 -451,6 +1034,12 @@@ int32_t ctdb_ltdb_update_seqnum(struct 
                return -1;
        }
  
 +      if (ctdb_db->unhealthy_reason) {
 +              DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_ltdb_update_seqnum: %s\n",
 +                               ctdb_db->db_name, ctdb_db->unhealthy_reason));
 +              return -1;
 +      }
 +
        tdb_increment_seqnum_nonblock(ctdb_db->ltdb->tdb);
        ctdb_db->seqnum = tdb_get_seqnum(ctdb_db->ltdb->tdb);
        return 0;
diff --combined server/ctdb_recoverd.c
index 088ecbba0b13eb358e2558832f4be7eff8d724a7,45b59d128a42a1b63bc7d54bb5e042d72576131e..4b407c38a40b80d726bb8175267700a1ebb67c3f
@@@ -1010,19 -1010,16 +1010,19 @@@ static struct tdb_wrap *create_recdb(st
        unsigned tdb_flags;
  
        /* open up the temporary recovery database */
 -      name = talloc_asprintf(mem_ctx, "%s/recdb.tdb", ctdb->db_directory);
 +      name = talloc_asprintf(mem_ctx, "%s/recdb.tdb.%u",
 +                             ctdb->db_directory_state,
 +                             ctdb->pnn);
        if (name == NULL) {
                return NULL;
        }
        unlink(name);
  
        tdb_flags = TDB_NOLOCK;
-       if (!ctdb->do_setsched) {
+       if (ctdb->valgrinding) {
                tdb_flags |= TDB_NOMMAP;
        }
 +      tdb_flags |= TDB_DISALLOW_NESTING;
  
        recdb = tdb_wrap_open(mem_ctx, name, ctdb->tunable.database_hash_size, 
                              tdb_flags, O_RDWR|O_CREAT|O_EXCL, 0600);
@@@ -1326,23 -1323,6 +1326,23 @@@ static int do_recovery(struct ctdb_reco
                return -1;
        }
  
 +      /*
 +        update all nodes to have the same flags that we have
 +       */
 +      for (i=0;i<nodemap->num;i++) {
 +              if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
 +                      continue;
 +              }
 +
 +              ret = update_flags_on_all_nodes(ctdb, nodemap, i, nodemap->nodes[i].flags);
 +              if (ret != 0) {
 +                      DEBUG(DEBUG_ERR, (__location__ " Unable to update flags on all nodes for node %d\n", i));
 +                      return -1;
 +              }
 +      }
 +
 +      DEBUG(DEBUG_NOTICE, (__location__ " Recovery - updated flags\n"));
 +
        /* pick a new generation number */
        generation = new_generation();
  
diff --combined server/ctdb_vacuum.c
index 2cbee309186238879f40a1cd0391855a69479977,655ecca6cf4777335bdc72b6163a11327ba744ee..22f4127375208059765da9a974bb407a3b7e1b52
@@@ -435,9 -435,7 +435,9 @@@ static int ctdb_repack_tdb(struct tdb_c
                return -1;
        }
  
 -      tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
 +      tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb),
 +                        TDB_INTERNAL|TDB_DISALLOW_NESTING,
 +                        O_RDWR|O_CREAT, 0);
        if (tmp_db == NULL) {
                DEBUG(DEBUG_ERR,(__location__ " Failed to create tmp_db\n"));
                tdb_transaction_cancel(tdb);
@@@ -516,19 -514,19 +516,22 @@@ static int update_tuning_db(struct ctdb
        struct vacuum_tuning_data tdata;
        struct vacuum_tuning_data *tptr;
        char *vac_dbname;
+       int flags;
  
        vac_dbname = talloc_asprintf(tmp_ctx, "%s/%s.%u",
 -                                      ctdb_db->ctdb->db_directory, 
 -                                      TUNINGDBNAME, ctdb_db->ctdb->pnn);
 +                                   ctdb_db->ctdb->db_directory_state,
 +                                   TUNINGDBNAME, ctdb_db->ctdb->pnn);
        if (vac_dbname == NULL) {
                DEBUG(DEBUG_CRIT,(__location__ " Out of memory error while allocating '%s'\n", vac_dbname));
                talloc_free(tmp_ctx);
                return -1;
        }
  
 -      flags = ctdb_db->ctdb->valgrinding ? TDB_NOMMAP : 0;
 -      tune_tdb = tdb_open(vac_dbname, 0, flags, O_RDWR|O_CREAT, 0644);
++      flags  = ctdb_db->ctdb->valgrinding ? TDB_NOMMAP : 0;
++      flags |= TDB_DISALLOW_NESTING;
 +      tune_tdb = tdb_open(vac_dbname, 0,
-                           TDB_DISALLOW_NESTING,
++                          flags,
 +                          O_RDWR|O_CREAT, 0600);
        if (tune_tdb == NULL) {
                DEBUG(DEBUG_ERR,(__location__ " Failed to create/open %s\n", TUNINGDBNAME));
                talloc_free(tmp_ctx);
@@@ -681,6 -679,7 +684,7 @@@ static int get_vacuum_interval(struct c
        char *vac_dbname;
        uint interval = ctdb_db->ctdb->tunable.vacuum_default_interval;
        struct ctdb_context *ctdb = ctdb_db->ctdb;
+       int flags;
  
        vac_dbname = talloc_asprintf(tmp_ctx, "%s/%s.%u", ctdb->db_directory, TUNINGDBNAME, ctdb->pnn);
        if (vac_dbname == NULL) {
                return interval;
        }
  
 -      flags = ctdb_db->ctdb->valgrinding ? TDB_NOMMAP : 0;
 -      tdb = tdb_open(vac_dbname, 0, flags, O_RDWR|O_CREAT, 0644);
++      flags  = ctdb_db->ctdb->valgrinding ? TDB_NOMMAP : 0;
++      flags |= TDB_DISALLOW_NESTING;
 +      tdb = tdb_open(vac_dbname, 0,
-                      TDB_DISALLOW_NESTING,
-                      O_RDWR|O_CREAT, 0644);
++                     flags,
++                     O_RDWR|O_CREAT, 0600);
        if (!tdb) {
                DEBUG(DEBUG_ERR,("Unable to open/create database %s using default interval\n", vac_dbname));
                talloc_free(tmp_ctx);
diff --combined server/ctdbd.c
index e32aa6580fab8a8625de5ac5ff00c4a86d49c9ce,80c5b150f4bc005aa87e2fe0d2aaef32fc552f65..00f630b99ac406566aec90214fa7893ac4bbe812
@@@ -38,11 -38,10 +38,11 @@@ static struct 
        const char *recovery_lock_file;
        const char *db_dir;
        const char *db_dir_persistent;
 +      const char *db_dir_state;
        const char *public_interface;
        const char *single_public_ip;
        const char *node_ip;
-       int         no_setsched;
+       int         valgrinding;
        int         use_syslog;
        int         start_as_disabled;
        int         start_as_stopped;
@@@ -51,7 -50,6 +51,7 @@@
        int         lvs;
        int         script_log_level;
        int         no_publicipcheck;
 +      int         max_persistent_check_errors;
  } options = {
        .nlist = ETCDIR "/ctdb/nodes",
        .transport = "tcp",
@@@ -59,7 -57,6 +59,7 @@@
        .logfile = LOGDIR "/log.ctdb",
        .db_dir = VARDIR "/ctdb",
        .db_dir_persistent = VARDIR "/ctdb/persistent",
 +      .db_dir_state = VARDIR "/ctdb/state",
        .script_log_level = DEBUG_ERR,
  };
  
@@@ -129,9 -126,8 +129,9 @@@ int main(int argc, const char *argv[]
                { "transport", 0, POPT_ARG_STRING, &options.transport, 0, "protocol transport", NULL },
                { "dbdir", 0, POPT_ARG_STRING, &options.db_dir, 0, "directory for the tdb files", NULL },
                { "dbdir-persistent", 0, POPT_ARG_STRING, &options.db_dir_persistent, 0, "directory for persistent tdb files", NULL },
 +              { "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
                { "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
-               { "nosetsched", 0, POPT_ARG_NONE, &options.no_setsched, 0, "disable setscheduler SCHED_FIFO call", NULL },
+               { "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "make valgrind more effective", NULL },
                { "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
                { "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
                { "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
                { "lvs", 0, POPT_ARG_NONE, &options.lvs, 0, "lvs is enabled on this node", NULL },
                { "script-log-level", 0, POPT_ARG_INT, &options.script_log_level, DEBUG_ERR, "log level of event script output", NULL },
                { "nopublicipcheck", 0, POPT_ARG_NONE, &options.no_publicipcheck, 0, "dont check we have/dont have the correct public ip addresses", NULL },
 +              { "max-persistent-check-errors", 0, POPT_ARG_INT,
 +                &options.max_persistent_check_errors, 0,
 +                "max allowed persistent check errors (default 0)", NULL },
                POPT_TABLEEND
        };
        int opt, ret;
                        exit(1);
                }
        }
 +      if (options.db_dir_state) {
 +              ret = ctdb_set_tdb_dir_state(ctdb, options.db_dir_state);
 +              if (ret == -1) {
 +                      DEBUG(DEBUG_ALERT,("ctdb_set_tdb_dir_state failed - %s\n", ctdb_errstr(ctdb)));
 +                      exit(1);
 +              }
 +      }
  
        if (options.public_interface) {
                ctdb->default_public_interface = talloc_strdup(ctdb, options.public_interface);
                }
        }
  
-       ctdb->do_setsched = !options.no_setsched;
+       ctdb->valgrinding = options.valgrinding;
  
        ctdb->do_checkpublicip = !options.no_publicipcheck;
  
 +      if (options.max_persistent_check_errors < 0) {
 +              ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
 +      } else {
 +              ctdb->max_persistent_check_errors = (uint64_t)options.max_persistent_check_errors;
 +      }
 +
        if (getenv("CTDB_BASE") == NULL) {
                /* setup a environment variable for the event scripts to use
                   to find the installation directory */