X-Git-Url: http://git.samba.org/?a=blobdiff_plain;f=server%2Fctdb_recover.c;h=eb3bf0a50c7dca2f9cbb31304d64b0e6ed9a4c95;hb=1.3;hp=dc6533cb3f3db019f88f67762233c0ff46118ae7;hpb=aea77353c5996510f09c72cddfb54894ad9048b0;p=sahlberg%2Fctdb.git diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c index dc6533cb..eb3bf0a5 100644 --- a/server/ctdb_recover.c +++ b/server/ctdb_recover.c @@ -18,7 +18,7 @@ along with this program; if not, see . */ #include "includes.h" -#include "lib/events/events.h" +#include "lib/tevent/tevent.h" #include "lib/tdb/include/tdb.h" #include "system/time.h" #include "system/network.h" @@ -31,15 +31,52 @@ /* lock all databases - mark only */ -static int ctdb_lock_all_databases_mark(struct ctdb_context *ctdb) +static int ctdb_lock_all_databases_mark(struct ctdb_context *ctdb, uint32_t priority) { struct ctdb_db_context *ctdb_db; - if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { + + if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) { + DEBUG(DEBUG_ERR,(__location__ " Illegal priority when trying to mark all databases Prio:%u\n", priority)); + return -1; + } + + if (ctdb->freeze_mode[priority] != CTDB_FREEZE_FROZEN) { DEBUG(DEBUG_ERR,("Attempt to mark all databases locked when not frozen\n")); return -1; } + /* The dual loop is a woraround for older versions of samba + that does not yet support the set-db-priority/lock order + call. So that we get basic deadlock avoiidance also for + these old versions of samba. + This code will be removed in the future. + */ + for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) { + if (ctdb_db->priority != priority) { + continue; + } + if (strstr(ctdb_db->db_name, "notify") != NULL) { + continue; + } + if (tdb_transaction_write_lock_mark(ctdb_db->ltdb->tdb) != 0) { + return -1; + } + if (tdb_lockall_mark(ctdb_db->ltdb->tdb) != 0) { + tdb_transaction_write_lock_unmark(ctdb_db->ltdb->tdb); + return -1; + } + } for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) { + if (ctdb_db->priority != priority) { + continue; + } + if (strstr(ctdb_db->db_name, "notify") == NULL) { + continue; + } + if (tdb_transaction_write_lock_mark(ctdb_db->ltdb->tdb) != 0) { + return -1; + } if (tdb_lockall_mark(ctdb_db->ltdb->tdb) != 0) { + tdb_transaction_write_lock_unmark(ctdb_db->ltdb->tdb); return -1; } } @@ -49,14 +86,24 @@ static int ctdb_lock_all_databases_mark(struct ctdb_context *ctdb) /* lock all databases - unmark only */ -static int ctdb_lock_all_databases_unmark(struct ctdb_context *ctdb) +static int ctdb_lock_all_databases_unmark(struct ctdb_context *ctdb, uint32_t priority) { struct ctdb_db_context *ctdb_db; - if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { + + if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) { + DEBUG(DEBUG_ERR,(__location__ " Illegal priority when trying to mark all databases Prio:%u\n", priority)); + return -1; + } + + if (ctdb->freeze_mode[priority] != CTDB_FREEZE_FROZEN) { DEBUG(DEBUG_ERR,("Attempt to unmark all databases locked when not frozen\n")); return -1; } for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) { + if (ctdb_db->priority != priority) { + continue; + } + tdb_transaction_write_lock_unmark(ctdb_db->ltdb->tdb); if (tdb_lockall_unmark(ctdb_db->ltdb->tdb) != 0) { return -1; } @@ -90,10 +137,13 @@ int ctdb_control_setvnnmap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata) { struct ctdb_vnn_map_wire *map = (struct ctdb_vnn_map_wire *)indata.dptr; + int i; - if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { - DEBUG(DEBUG_ERR,("Attempt to set vnnmap when not frozen\n")); - return -1; + for(i=1; i<=NUM_DB_PRIORITIES; i++) { + if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) { + DEBUG(DEBUG_ERR,("Attempt to set vnnmap when not frozen\n")); + return -1; + } } talloc_free(ctdb->vnn_map); @@ -163,8 +213,47 @@ ctdb_control_getnodemap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA ind node_map = (struct ctdb_node_map *)outdata->dptr; node_map->num = num_nodes; for (i=0; inodes[i]->address.address, &node_map->nodes[i].addr) == 0) { + if (parse_ip(ctdb->nodes[i]->address.address, + NULL, /* TODO: pass in the correct interface here*/ + 0, + &node_map->nodes[i].addr) == 0) + { + DEBUG(DEBUG_ERR, (__location__ " Failed to parse %s into a sockaddr\n", ctdb->nodes[i]->address.address)); + } + + node_map->nodes[i].pnn = ctdb->nodes[i]->pnn; + node_map->nodes[i].flags = ctdb->nodes[i]->flags; + } + + return 0; +} + +/* + get an old style ipv4-only nodemap +*/ +int +ctdb_control_getnodemapv4(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata) +{ + uint32_t i, num_nodes; + struct ctdb_node_mapv4 *node_map; + + CHECK_CONTROL_DATA_SIZE(0); + + num_nodes = ctdb->num_nodes; + + outdata->dsize = offsetof(struct ctdb_node_mapv4, nodes) + num_nodes*sizeof(struct ctdb_node_and_flagsv4); + outdata->dptr = (unsigned char *)talloc_zero_size(outdata, outdata->dsize); + if (!outdata->dptr) { + DEBUG(DEBUG_ALERT, (__location__ " Failed to allocate nodemap array\n")); + exit(1); + } + + node_map = (struct ctdb_node_mapv4 *)outdata->dptr; + node_map->num = num_nodes; + for (i=0; inodes[i]->address.address, 0, &node_map->nodes[i].sin) == 0) { DEBUG(DEBUG_ERR, (__location__ " Failed to parse %s into a sockaddr\n", ctdb->nodes[i]->address.address)); + return -1; } node_map->nodes[i].pnn = ctdb->nodes[i]->pnn; @@ -178,30 +267,50 @@ static void ctdb_reload_nodes_event(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data) { - int ret; + int i, num_nodes; struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); - int ctdb_tcp_init(struct ctdb_context *); + TALLOC_CTX *tmp_ctx; + struct ctdb_node **nodes; - /* shut down the transport */ - if (ctdb->methods != NULL) { - ctdb->methods->shutdown(ctdb); - } + tmp_ctx = talloc_new(ctdb); - /* start the transport again */ + /* steal the old nodes file for a while */ + talloc_steal(tmp_ctx, ctdb->nodes); + nodes = ctdb->nodes; + ctdb->nodes = NULL; + num_nodes = ctdb->num_nodes; + ctdb->num_nodes = 0; + + /* load the new nodes file */ ctdb_load_nodes_file(ctdb); - ret = ctdb_tcp_init(ctdb); - if (ret != 0) { - DEBUG(DEBUG_CRIT, (__location__ " Failed to init TCP\n")); - exit(1); - } - if (ctdb->methods == NULL) { - DEBUG(DEBUG_ALERT,(__location__ " Can not restart transport. ctdb->methods==NULL\n")); - ctdb_fatal(ctdb, "can not reinitialize transport."); + for (i=0; inum_nodes; i++) { + /* keep any identical pre-existing nodes and connections */ + if ((i < num_nodes) && ctdb_same_address(&ctdb->nodes[i]->address, &nodes[i]->address)) { + talloc_free(ctdb->nodes[i]); + ctdb->nodes[i] = talloc_steal(ctdb->nodes, nodes[i]); + continue; + } + + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } + + /* any new or different nodes must be added */ + if (ctdb->methods->add_node(ctdb->nodes[i]) != 0) { + DEBUG(DEBUG_CRIT, (__location__ " methods->add_node failed at %d\n", i)); + ctdb_fatal(ctdb, "failed to add node. shutting down\n"); + } + if (ctdb->methods->connect_node(ctdb->nodes[i]) != 0) { + DEBUG(DEBUG_CRIT, (__location__ " methods->add_connect failed at %d\n", i)); + ctdb_fatal(ctdb, "failed to connect to node. shutting down\n"); + } } - ctdb->methods->initialise(ctdb); - ctdb->methods->start(ctdb); + /* tell the recovery daemon to reaload the nodes file too */ + ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELOAD_NODES, tdb_null); + + talloc_free(tmp_ctx); return; } @@ -240,10 +349,8 @@ static int traverse_pulldb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, } params->pulldata = talloc_realloc_size(NULL, params->pulldata, rec->length + params->len); if (params->pulldata == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Failed to expand pulldb_data to %u (%u records)\n", - rec->length + params->len, params->pulldata->count)); - params->failed = true; - return -1; + DEBUG(DEBUG_CRIT,(__location__ " Failed to expand pulldb_data to %u\n", rec->length + params->len)); + ctdb_fatal(params->ctdb, "failed to allocate memory for recovery. shutting down\n"); } params->pulldata->count++; memcpy(params->len+(uint8_t *)params->pulldata, rec, rec->length); @@ -263,11 +370,6 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT struct pulldb_data params; struct ctdb_marshall_buffer *reply; - if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { - DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_pull_db when not frozen\n")); - return -1; - } - pull = (struct ctdb_control_pulldb *)indata.dptr; ctdb_db = find_ctdb_db(ctdb, pull->db_id); @@ -276,6 +378,11 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT return -1; } + if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) { + DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_pull_db when not frozen\n")); + return -1; + } + reply = talloc_zero(outdata, struct ctdb_marshall_buffer); CTDB_NO_MEMORY(ctdb, reply); @@ -286,19 +393,25 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT params.len = offsetof(struct ctdb_marshall_buffer, data); params.failed = false; - if (ctdb_lock_all_databases_mark(ctdb) != 0) { + if (ctdb_db->unhealthy_reason) { + /* this is just a warning, as the tdb should be empty anyway */ + DEBUG(DEBUG_WARNING,("db(%s) unhealty in ctdb_control_pull_db: %s\n", + ctdb_db->db_name, ctdb_db->unhealthy_reason)); + } + + if (ctdb_lock_all_databases_mark(ctdb, ctdb_db->priority) != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to get lock on entired db - failing\n")); return -1; } if (tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_pulldb, ¶ms) == -1) { DEBUG(DEBUG_ERR,(__location__ " Failed to get traverse db '%s'\n", ctdb_db->db_name)); - ctdb_lock_all_databases_unmark(ctdb); + ctdb_lock_all_databases_unmark(ctdb, ctdb_db->priority); talloc_free(params.pulldata); return -1; } - ctdb_lock_all_databases_unmark(ctdb); + ctdb_lock_all_databases_unmark(ctdb, ctdb_db->priority); outdata->dptr = (uint8_t *)params.pulldata; outdata->dsize = params.len; @@ -316,11 +429,6 @@ int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata) int i, ret; struct ctdb_rec_data *rec; - if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { - DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_push_db when not frozen\n")); - return -1; - } - if (indata.dsize < offsetof(struct ctdb_marshall_buffer, data)) { DEBUG(DEBUG_ERR,(__location__ " invalid data in pulldb reply\n")); return -1; @@ -332,7 +440,12 @@ int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata) return -1; } - if (ctdb_lock_all_databases_mark(ctdb) != 0) { + if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) { + DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_push_db when not frozen\n")); + return -1; + } + + if (ctdb_lock_all_databases_mark(ctdb, ctdb_db->priority) != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to get lock on entired db - failing\n")); return -1; } @@ -371,11 +484,11 @@ int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata) DEBUG(DEBUG_DEBUG,("finished push of %u records for dbid 0x%x\n", reply->count, reply->db_id)); - ctdb_lock_all_databases_unmark(ctdb); + ctdb_lock_all_databases_unmark(ctdb, ctdb_db->priority); return 0; failed: - ctdb_lock_all_databases_unmark(ctdb); + ctdb_lock_all_databases_unmark(ctdb, ctdb_db->priority); return -1; } @@ -409,25 +522,25 @@ int32_t ctdb_control_set_dmaster(struct ctdb_context *ctdb, TDB_DATA indata) struct ctdb_control_set_dmaster *p = (struct ctdb_control_set_dmaster *)indata.dptr; struct ctdb_db_context *ctdb_db; - if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { - DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_set_dmaster when not frozen\n")); - return -1; - } - ctdb_db = find_ctdb_db(ctdb, p->db_id); if (!ctdb_db) { DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", p->db_id)); return -1; } - if (ctdb_lock_all_databases_mark(ctdb) != 0) { + if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) { + DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_set_dmaster when not frozen\n")); + return -1; + } + + if (ctdb_lock_all_databases_mark(ctdb, ctdb_db->priority) != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to get lock on entired db - failing\n")); return -1; } tdb_traverse(ctdb_db->ltdb->tdb, traverse_setdmaster, &p->dmaster); - ctdb_lock_all_databases_unmark(ctdb); + ctdb_lock_all_databases_unmark(ctdb, ctdb_db->priority); return 0; } @@ -440,6 +553,7 @@ struct ctdb_set_recmode_state { struct timed_event *te; struct fd_event *fde; pid_t child; + struct timeval start_time; }; /* @@ -452,7 +566,14 @@ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_even struct ctdb_set_recmode_state *state = talloc_get_type(private_data, struct ctdb_set_recmode_state); - ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "timeout in ctdb_set_recmode"); + /* we consider this a success, not a failure, as we failed to + set the recovery lock which is what we wanted. This can be + caused by the cluster filesystem being very slow to + arbitrate locks immediately after a node failure. + */ + DEBUG(DEBUG_ERR,(__location__ " set_recmode child process hung/timedout CFS slow to grant locks? (allowing recmode set anyway)\n")); + state->ctdb->recovery_mode = state->recmode; + ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL); talloc_free(state); } @@ -461,6 +582,16 @@ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_even */ static int set_recmode_destructor(struct ctdb_set_recmode_state *state) { + double l = timeval_elapsed(&state->start_time); + + CTDB_UPDATE_RECLOCK_LATENCY(state->ctdb, "daemon reclock", reclock.ctdbd, l); + + if (state->fd[0] != -1) { + state->fd[0] = -1; + } + if (state->fd[1] != -1) { + state->fd[1] = -1; + } kill(state->child, SIGKILL); return 0; } @@ -499,11 +630,45 @@ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde, state->ctdb->recovery_mode = state->recmode; + /* release any deferred attach calls from clients */ + if (state->recmode == CTDB_RECOVERY_NORMAL) { + ctdb_process_deferred_attach(state->ctdb); + } + ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL); talloc_free(state); return; } +static void +ctdb_drop_all_ips_event(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); + + DEBUG(DEBUG_ERR,(__location__ " Been in recovery mode for too long. Dropping all IPS\n")); + talloc_free(ctdb->release_ips_ctx); + ctdb->release_ips_ctx = NULL; + + ctdb_release_all_ips(ctdb); +} + +/* + * Set up an event to drop all public ips if we remain in recovery for too + * long + */ +int ctdb_deferred_drop_all_ips(struct ctdb_context *ctdb) +{ + if (ctdb->release_ips_ctx != NULL) { + talloc_free(ctdb->release_ips_ctx); + } + ctdb->release_ips_ctx = talloc_new(ctdb); + CTDB_NO_MEMORY(ctdb, ctdb->release_ips_ctx); + + event_add_timed(ctdb->ev, ctdb->release_ips_ctx, timeval_current_ofs(ctdb->tunable.recovery_drop_all_ips, 0), ctdb_drop_all_ips_event, ctdb); + return 0; +} + /* set the recovery mode */ @@ -513,15 +678,20 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, const char **errormsg) { uint32_t recmode = *(uint32_t *)indata.dptr; - int ret; + int i, ret; struct ctdb_set_recmode_state *state; pid_t parent = getpid(); - if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { - DEBUG(DEBUG_ERR,("Attempt to change recovery mode to %u when not frozen\n", - recmode)); - (*errormsg) = "Cannot change recovery mode while not frozen"; - return -1; + /* if we enter recovery but stay in recovery for too long + we will eventually drop all our ip addresses + */ + if (recmode == CTDB_RECOVERY_NORMAL) { + talloc_free(ctdb->release_ips_ctx); + ctdb->release_ips_ctx = NULL; + } else { + if (ctdb_deferred_drop_all_ips(ctdb) != 0) { + DEBUG(DEBUG_ERR,("Failed to set up deferred drop all ips\n")); + } } if (recmode != ctdb->recovery_mode) { @@ -537,19 +707,35 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, /* some special handling when ending recovery mode */ - /* force the databased to thaw */ - if (ctdb->freeze_handle) { - ctdb_control_thaw(ctdb); + /* force the databases to thaw */ + for (i=1; i<=NUM_DB_PRIORITIES; i++) { + if (ctdb->freeze_handles[i] != NULL) { + ctdb_control_thaw(ctdb, i); + } } state = talloc(ctdb, struct ctdb_set_recmode_state); CTDB_NO_MEMORY(ctdb, state); + state->start_time = timeval_current(); + state->fd[0] = -1; + state->fd[1] = -1; + + /* release any deferred attach calls from clients */ + if (recmode == CTDB_RECOVERY_NORMAL) { + ctdb_process_deferred_attach(ctdb); + } + + if (ctdb->tunable.verify_recovery_lock == 0) { + /* dont need to verify the reclock file */ + ctdb->recovery_mode = recmode; + return 0; + } + /* For the rest of what needs to be done, we need to do this in a child process since 1, the call to ctdb_recovery_lock() can block if the cluster filesystem is in the process of recovery. - 2, running of the script may take a while. */ ret = pipe(state->fd); if (ret != 0) { @@ -570,7 +756,8 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, char cc = 0; close(state->fd[0]); - /* we should not be able to get the lock on the nodes list, + debug_extra = talloc_asprintf(NULL, "set_recmode:"); + /* we should not be able to get the lock on the reclock file, as it should be held by the recovery master */ if (ctdb_recovery_lock(ctdb, false)) { @@ -582,24 +769,32 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, /* make sure we die when our parent dies */ while (kill(parent, 0) == 0 || errno != ESRCH) { sleep(5); + write(state->fd[1], &cc, 1); } _exit(0); } close(state->fd[1]); + set_close_on_exec(state->fd[0]); + + state->fd[1] = -1; talloc_set_destructor(state, set_recmode_destructor); - state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(3, 0), - ctdb_set_recmode_timeout, state); + DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for setrecmode\n", state->fd[0])); + + state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(5, 0), + ctdb_set_recmode_timeout, state); state->fde = event_add_fd(ctdb->ev, state, state->fd[0], - EVENT_FD_READ|EVENT_FD_AUTOCLOSE, + EVENT_FD_READ, set_recmode_handler, (void *)state); + if (state->fde == NULL) { talloc_free(state); return -1; } + tevent_fd_set_auto_close(state->fde); state->ctdb = ctdb; state->recmode = recmode; @@ -619,9 +814,14 @@ bool ctdb_recovery_lock(struct ctdb_context *ctdb, bool keep) { struct flock lock; + if (keep) { + DEBUG(DEBUG_ERR, ("Take the recovery lock\n")); + } if (ctdb->recovery_lock_fd != -1) { close(ctdb->recovery_lock_fd); + ctdb->recovery_lock_fd = -1; } + ctdb->recovery_lock_fd = open(ctdb->recovery_lock_file, O_RDWR|O_CREAT, 0600); if (ctdb->recovery_lock_fd == -1) { DEBUG(DEBUG_ERR,("ctdb_recovery_lock: Unable to open %s - (%s)\n", @@ -651,6 +851,10 @@ bool ctdb_recovery_lock(struct ctdb_context *ctdb, bool keep) ctdb->recovery_lock_fd = -1; } + if (keep) { + DEBUG(DEBUG_NOTICE, ("Recovery lock taken successfully\n")); + } + DEBUG(DEBUG_NOTICE,("ctdb_recovery_lock: Got recovery lock on '%s'\n", ctdb->recovery_lock_file)); return true; @@ -767,9 +971,13 @@ static void ctdb_end_recovery_callback(struct ctdb_context *ctdb, int status, vo struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state); ctdb_enable_monitoring(ctdb); + CTDB_INCREMENT_STAT(ctdb, num_recoveries); if (status != 0) { DEBUG(DEBUG_ERR,(__location__ " recovered event script failed (status %d)\n", status)); + if (status == -ETIME) { + ctdb_ban_self(ctdb); + } } ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL); @@ -790,18 +998,20 @@ int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb, DEBUG(DEBUG_NOTICE,("Recovery has finished\n")); + ctdb_persistent_finish_trans3_commits(ctdb); + state = talloc(ctdb, struct recovery_callback_state); CTDB_NO_MEMORY(ctdb, state); - state->c = talloc_steal(state, c); + state->c = c; ctdb_disable_monitoring(ctdb); - ret = ctdb_event_script_callback(ctdb, - timeval_current_ofs(ctdb->tunable.script_timeout, 0), - state, + ret = ctdb_event_script_callback(ctdb, state, ctdb_end_recovery_callback, - state, "recovered"); + state, + false, + CTDB_EVENT_RECOVERED, "%s", ""); if (ret != 0) { ctdb_enable_monitoring(ctdb); @@ -812,6 +1022,7 @@ int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb, } /* tell the control that we will be reply asynchronously */ + state->c = talloc_steal(state, c); *async_reply = true; return 0; } @@ -851,11 +1062,11 @@ int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, ctdb_disable_monitoring(ctdb); - ret = ctdb_event_script_callback(ctdb, - timeval_current_ofs(ctdb->tunable.script_timeout, 0), - state, + ret = ctdb_event_script_callback(ctdb, state, ctdb_start_recovery_callback, - state, "startrecovery"); + state, false, + CTDB_EVENT_START_RECOVERY, + "%s", ""); if (ret != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to start recovery\n")); @@ -976,7 +1187,7 @@ static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context); uint32_t *count = talloc_get_type(ctdb->recd_ping_count, uint32_t); - DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Count : %u\n", *count)); + DEBUG(DEBUG_ERR, ("Recovery daemon ping timeout. Count : %u\n", *count)); if (*count < ctdb->tunable.recd_ping_failcount) { (*count)++; @@ -986,18 +1197,10 @@ static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event return; } - DEBUG(DEBUG_ERR, (__location__ " Final timeout for recovery daemon ping. Shutting down ctdb daemon\n")); + DEBUG(DEBUG_ERR, ("Final timeout for recovery daemon ping. Restarting recovery daemon. (This can be caused if the cluster filesystem has hung)\n")); ctdb_stop_recoverd(ctdb); - ctdb_stop_keepalive(ctdb); - ctdb_stop_monitoring(ctdb); - ctdb_release_all_ips(ctdb); - if (ctdb->methods != NULL) { - ctdb->methods->shutdown(ctdb); - } - ctdb_event_script(ctdb, "shutdown"); - DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Daemon has been shut down.\n")); - exit(0); + ctdb_start_recoverd(ctdb); } /* The recovery daemon will ping us at regular intervals. @@ -1020,3 +1223,78 @@ int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb) return 0; } + + +int32_t ctdb_control_set_recmaster(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata) +{ + CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t)); + + ctdb->recovery_master = ((uint32_t *)(&indata.dptr[0]))[0]; + return 0; +} + + +struct stop_node_callback_state { + struct ctdb_req_control *c; +}; + +/* + called when the 'stopped' event script has finished + */ +static void ctdb_stop_node_callback(struct ctdb_context *ctdb, int status, void *p) +{ + struct stop_node_callback_state *state = talloc_get_type(p, struct stop_node_callback_state); + + if (status != 0) { + DEBUG(DEBUG_ERR,(__location__ " stopped event script failed (status %d)\n", status)); + ctdb->nodes[ctdb->pnn]->flags &= ~NODE_FLAGS_STOPPED; + if (status == -ETIME) { + ctdb_ban_self(ctdb); + } + } + + ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL); + talloc_free(state); +} + +int32_t ctdb_control_stop_node(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply) +{ + int ret; + struct stop_node_callback_state *state; + + DEBUG(DEBUG_INFO,(__location__ " Stopping node\n")); + + state = talloc(ctdb, struct stop_node_callback_state); + CTDB_NO_MEMORY(ctdb, state); + + state->c = talloc_steal(state, c); + + ctdb_disable_monitoring(ctdb); + + ret = ctdb_event_script_callback(ctdb, state, + ctdb_stop_node_callback, + state, false, + CTDB_EVENT_STOPPED, "%s", ""); + + if (ret != 0) { + ctdb_enable_monitoring(ctdb); + + DEBUG(DEBUG_ERR,(__location__ " Failed to stop node\n")); + talloc_free(state); + return -1; + } + + ctdb->nodes[ctdb->pnn]->flags |= NODE_FLAGS_STOPPED; + + *async_reply = true; + + return 0; +} + +int32_t ctdb_control_continue_node(struct ctdb_context *ctdb) +{ + DEBUG(DEBUG_INFO,(__location__ " Continue node\n")); + ctdb->nodes[ctdb->pnn]->flags &= ~NODE_FLAGS_STOPPED; + + return 0; +}