*/
static struct {
- const char *nlist;
- const char *transport;
- const char *myaddress;
const char *socketname;
- int self_connect;
- const char *db_dir;
int torture;
const char *events;
} ctdb_cmdline = {
- .nlist = ETCDIR "/ctdb/nodes",
- .transport = "tcp",
- .myaddress = NULL,
.socketname = CTDB_PATH,
- .self_connect = 0,
- .db_dir = VARDIR "/ctdb",
.torture = 0,
};
struct poptOption popt_ctdb_cmdline[] = {
{ NULL, 0, POPT_ARG_CALLBACK, (void *)ctdb_cmdline_callback },
- { "nlist", 0, POPT_ARG_STRING, &ctdb_cmdline.nlist, 0, "node list file", "filename" },
- { "listen", 0, POPT_ARG_STRING, &ctdb_cmdline.myaddress, 0, "address to listen on", "address" },
{ "socket", 0, POPT_ARG_STRING, &ctdb_cmdline.socketname, 0, "local socket name", "filename" },
- { "transport", 0, POPT_ARG_STRING, &ctdb_cmdline.transport, 0, "protocol transport", NULL },
- { "self-connect", 0, POPT_ARG_NONE, &ctdb_cmdline.self_connect, 0, "enable self connect", "boolean" },
{ "debug", 'd', POPT_ARG_INT, &LogLevel, 0, "debug level"},
- { "dbdir", 0, POPT_ARG_STRING, &ctdb_cmdline.db_dir, 0, "directory for the tdb files", NULL },
{ "torture", 0, POPT_ARG_NONE, &ctdb_cmdline.torture, 0, "enable nastiness in library", NULL },
{ "events", 0, POPT_ARG_STRING, NULL, OPT_EVENTSYSTEM, "event system", NULL },
{ NULL }
struct ctdb_context *ctdb;
int ret;
- if (ctdb_cmdline.nlist == NULL) {
- printf("You must provide a node list with --nlist\n");
- exit(1);
- }
-
/* initialise ctdb */
ctdb = ctdb_init(ev);
if (ctdb == NULL) {
exit(1);
}
- if (ctdb_cmdline.self_connect) {
- ctdb_set_flags(ctdb, CTDB_FLAG_SELF_CONNECT);
- }
if (ctdb_cmdline.torture) {
ctdb_set_flags(ctdb, CTDB_FLAG_TORTURE);
}
- ret = ctdb_set_transport(ctdb, ctdb_cmdline.transport);
- if (ret == -1) {
- printf("ctdb_set_transport failed - %s\n", ctdb_errstr(ctdb));
- exit(1);
- }
-
- /* tell ctdb what address to listen on */
- if (ctdb_cmdline.myaddress) {
- ret = ctdb_set_address(ctdb, ctdb_cmdline.myaddress);
- if (ret == -1) {
- printf("ctdb_set_address failed - %s\n", ctdb_errstr(ctdb));
- exit(1);
- }
- }
-
/* tell ctdb the socket address */
ret = ctdb_set_socketname(ctdb, ctdb_cmdline.socketname);
if (ret == -1) {
exit(1);
}
- /* tell ctdb what nodes are available */
- ret = ctdb_set_nlist(ctdb, ctdb_cmdline.nlist);
- if (ret == -1) {
- printf("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb));
- exit(1);
- }
-
- if (ctdb_cmdline.db_dir) {
- ret = ctdb_set_tdb_dir(ctdb, ctdb_cmdline.db_dir);
- if (ret == -1) {
- printf("ctdb_set_tdb_dir failed - %s\n", ctdb_errstr(ctdb));
- exit(1);
- }
- }
-
return ctdb;
}
TDB_DATA data;
int res;
struct daemon_control_state *state;
+ TALLOC_CTX *tmp_ctx = talloc_new(client);
if (c->hdr.destnode == CTDB_CURRENT_NODE) {
c->hdr.destnode = client->ctdb->vnn;
}
talloc_set_destructor(state, daemon_control_destructor);
+
+ if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
+ talloc_steal(tmp_ctx, state);
+ }
data.dptr = &c->data[0];
data.dsize = c->datalen;
c->hdr.destnode));
}
- if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
- talloc_free(state);
- }
+ talloc_free(tmp_ctx);
}
/*
continue;
}
- /* it might have come alive again */
- if (!(node->flags & NODE_FLAGS_CONNECTED) && node->rx_cnt != 0) {
- ctdb_node_connected(node);
+ if (!(node->flags & NODE_FLAGS_CONNECTED)) {
+ /* it might have come alive again */
+ if (node->rx_cnt != 0) {
+ ctdb_node_connected(node);
+ }
continue;
}
+
if (node->rx_cnt == 0) {
node->dead_count++;
} else {
node->rx_cnt = 0;
if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) {
+ DEBUG(0,("dead count reached for node %u\n", node->vnn));
ctdb_node_dead(node);
ctdb_send_keepalive(ctdb, node->vnn);
/* maybe tell the transport layer to kill the
continue;
}
- if (node->tx_cnt == 0 && (node->flags & NODE_FLAGS_CONNECTED)) {
+ if (node->tx_cnt == 0) {
+ DEBUG(5,("sending keepalive to %u\n", node->vnn));
ctdb_send_keepalive(ctdb, node->vnn);
}
return 0;
}
+
+/*
+ try and lock the node list file - should only work on the recovery master recovery
+ daemon. Anywhere else is a bug
+ */
+bool ctdb_lock_node_list(struct ctdb_context *ctdb, bool keep)
+{
+ struct flock lock;
+
+ if (ctdb->node_list_fd != -1) {
+ close(ctdb->node_list_fd);
+ }
+ ctdb->node_list_fd = open(ctdb->node_list_file, O_RDWR);
+ if (ctdb->node_list_fd == -1) {
+ DEBUG(0,("Unable to open %s - (%s)\n",
+ ctdb->node_list_file, strerror(errno)));
+ return false;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 1;
+ lock.l_pid = 0;
+
+ if (fcntl(ctdb->node_list_fd, F_SETLK, &lock) != 0) {
+ return false;
+ }
+
+ if (!keep) {
+ close(ctdb->node_list_fd);
+ ctdb->node_list_fd = -1;
+ }
+
+ return true;
+}
return 0;
}
-
+/*
+ we are the recmaster, and recovery is needed - start a recovery run
+ */
static int do_recovery(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active,
struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
int i, j, ret;
uint32_t generation;
struct ctdb_dbid_map *dbmap;
- struct flock lock;
+
+ if (!ctdb_lock_node_list(ctdb, true)) {
+ DEBUG(0,("Unable to lock node list - aborting recovery\n"));
+ return -1;
+ }
/* set recovery mode to active on all nodes */
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
return -1;
}
- /* get the recmaster lock */
- if (ctdb->node_list_fd != -1) {
- close(ctdb->node_list_fd);
- }
-
- ctdb->node_list_fd = open(ctdb->node_list_file, O_RDWR);
- if (ctdb->node_list_fd == -1) {
- DEBUG(0,("Unable to open %s - aborting recovery (%s)\n",
- ctdb->node_list_file, strerror(errno)));
- return -1;
- }
-
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 1;
- lock.l_pid = 0;
-
- if (fcntl(ctdb->node_list_fd, F_SETLK, &lock) != 0) {
- DEBUG(0,("Unable to lock %s - aborting recovery (%s)\n",
- ctdb->node_list_file, strerror(errno)));
- return -1;
- }
-
DEBUG(0, (__location__ " Recovery initiated\n"));
/* pick a new generation number */
}
static struct {
+ const char *nlist;
+ const char *transport;
+ const char *myaddress;
const char *public_address_list;
const char *public_interface;
const char *event_script;
const char *logfile;
+ const char *recovery_lock_file;
+ const char *db_dir;
+ int self_connect;
} options = {
+ .nlist = ETCDIR "/ctdb/nodes",
+ .transport = "tcp",
.event_script = ETCDIR "/ctdb/events",
- .logfile = VARDIR "/log/log.ctdb"
+ .logfile = VARDIR "/log/log.ctdb",
+ .db_dir = VARDIR "/ctdb",
+ .self_connect = 0,
};
{ "public-interface", 0, POPT_ARG_STRING, &options.public_interface, 0, "public interface", "interface"},
{ "event-script", 0, POPT_ARG_STRING, &options.event_script, 0, "event script", "filename" },
{ "logfile", 0, POPT_ARG_STRING, &options.logfile, 0, "log file location", "filename" },
+ { "nlist", 0, POPT_ARG_STRING, &options.nlist, 0, "node list file", "filename" },
+ { "listen", 0, POPT_ARG_STRING, &options.myaddress, 0, "address to listen on", "address" },
+ { "transport", 0, POPT_ARG_STRING, &options.transport, 0, "protocol transport", NULL },
+ { "self-connect", 0, POPT_ARG_NONE, &options.self_connect, 0, "enable self connect", "boolean" },
+ { "dbdir", 0, POPT_ARG_STRING, &options.db_dir, 0, "directory for the tdb files", NULL },
POPT_TABLEEND
};
int opt, ret;
ctdb = ctdb_cmdline_init(ev);
+ if (options.self_connect) {
+ ctdb_set_flags(ctdb, CTDB_FLAG_SELF_CONNECT);
+ }
+
+ ret = ctdb_set_transport(ctdb, options.transport);
+ if (ret == -1) {
+ printf("ctdb_set_transport failed - %s\n", ctdb_errstr(ctdb));
+ exit(1);
+ }
+
+ /* tell ctdb what address to listen on */
+ if (options.myaddress) {
+ ret = ctdb_set_address(ctdb, options.myaddress);
+ if (ret == -1) {
+ printf("ctdb_set_address failed - %s\n", ctdb_errstr(ctdb));
+ exit(1);
+ }
+ }
+
+ /* tell ctdb what nodes are available */
+ ret = ctdb_set_nlist(ctdb, options.nlist);
+ if (ret == -1) {
+ printf("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb));
+ exit(1);
+ }
+
+ if (options.db_dir) {
+ ret = ctdb_set_tdb_dir(ctdb, options.db_dir);
+ if (ret == -1) {
+ printf("ctdb_set_tdb_dir failed - %s\n", ctdb_errstr(ctdb));
+ exit(1);
+ }
+ }
+
ret = ctdb_set_logfile(ctdb, options.logfile);
if (ret == -1) {
printf("ctdb_set_logfile to %s failed - %s\n", options.logfile, ctdb_errstr(ctdb));
#define CTDB_MONITORING_TIMEOUT 2
/* number of monitoring timeouts before a node is considered dead */
-#define CTDB_MONITORING_DEAD_COUNT 2
+#define CTDB_MONITORING_DEAD_COUNT 3
/* number of consecutive calls from the same node before we give them
void set_nonblocking(int fd);
void set_close_on_exec(int fd);
+bool ctdb_lock_node_list(struct ctdb_context *ctdb, bool keep);
+
#endif