#include "cmdline.h"
#include "../include/ctdb.h"
#include "../include/ctdb_private.h"
+#include "db_wrap.h"
+#include "dlinklist.h"
struct ban_state {
uint32_t node_flags;
struct timed_event *send_election_te;
struct timed_event *election_timeout;
+ struct vacuum_info *vacuum_info;
};
#define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
#define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
-struct async_data {
- uint32_t count;
- uint32_t fail_count;
-};
-
-static void async_callback(struct ctdb_client_control_state *state)
-{
- struct async_data *data = talloc_get_type(state->async.private_data, struct async_data);
-
- /* one more node has responded with recmode data */
- data->count--;
-
- /* if we failed to push the db, then return an error and let
- the main loop try again.
- */
- if (state->state != CTDB_CONTROL_DONE) {
- DEBUG(0,("Async operation failed with state %d\n", state->state));
- data->fail_count++;
- }
-}
-
-
-static void async_add(struct async_data *data, struct ctdb_client_control_state *state)
-{
- /* set up the callback functions */
- state->async.fn = async_callback;
- state->async.private_data = data;
-
- /* one more control to wait for to complete */
- data->count++;
-}
-
-
-/* wait for up to the maximum number of seconds allowed
- or until all nodes we expect a response from has replied
-*/
-static int async_wait(struct ctdb_context *ctdb, struct async_data *data)
-{
- while (data->count > 0) {
- event_loop_once(ctdb->ev);
- }
- if (data->fail_count != 0) {
- DEBUG(0,("Async wait failed - fail_count=%u\n", data->fail_count));
- return -1;
- }
- return 0;
-}
-
-
/*
unban a node
*/
{
struct ctdb_context *ctdb = rec->ctdb;
- DEBUG(0,("Unbanning node %u\n", pnn));
+ DEBUG(DEBUG_NOTICE,("Unbanning node %u\n", pnn));
if (!ctdb_validate_pnn(ctdb, pnn)) {
- DEBUG(0,("Bad pnn %u in ctdb_unban_node\n", pnn));
+ DEBUG(DEBUG_ERR,("Bad pnn %u in ctdb_unban_node\n", pnn));
return;
}
TDB_DATA data;
int ret;
- DEBUG(0,("Unanning remote node %u. Passing the ban request on to the remote node.\n", pnn));
+ DEBUG(DEBUG_NOTICE,("Unanning remote node %u. Passing the ban request on to the remote node.\n", pnn));
data.dptr = (uint8_t *)&pnn;
data.dsize = sizeof(uint32_t);
ret = ctdb_send_message(ctdb, pnn, CTDB_SRVID_UNBAN_NODE, data);
if (ret != 0) {
- DEBUG(0,("Failed to unban node %u\n", pnn));
+ DEBUG(DEBUG_ERR,("Failed to unban node %u\n", pnn));
return;
}
there is an election */
rec->node_flags &= ~NODE_FLAGS_BANNED;
- DEBUG(0,("Clearing ban flag on node %u\n", pnn));
+ DEBUG(DEBUG_INFO,("Clearing ban flag on node %u\n", pnn));
ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), pnn, 0, NODE_FLAGS_BANNED);
if (rec->banned_nodes[pnn] == NULL) {
- DEBUG(0,("No ban recorded for this node. ctdb_unban_node() request ignored\n"));
+ DEBUG(DEBUG_INFO,("No ban recorded for this node. ctdb_unban_node() request ignored\n"));
return;
}
struct ctdb_recoverd *rec = state->rec;
uint32_t pnn = state->banned_node;
- DEBUG(0,("Ban timeout. Node %u is now unbanned\n", pnn));
+ DEBUG(DEBUG_NOTICE,("Ban timeout. Node %u is now unbanned\n", pnn));
ctdb_unban_node(rec, pnn);
}
{
struct ctdb_context *ctdb = rec->ctdb;
- DEBUG(0,("Banning node %u for %u seconds\n", pnn, ban_time));
+ DEBUG(DEBUG_NOTICE,("Banning node %u for %u seconds\n", pnn, ban_time));
if (!ctdb_validate_pnn(ctdb, pnn)) {
- DEBUG(0,("Bad pnn %u in ctdb_ban_node\n", pnn));
+ DEBUG(DEBUG_ERR,("Bad pnn %u in ctdb_ban_node\n", pnn));
return;
}
if (0 == ctdb->tunable.enable_bans) {
- DEBUG(0,("Bans are disabled - ignoring ban of node %u\n", pnn));
+ DEBUG(DEBUG_INFO,("Bans are disabled - ignoring ban of node %u\n", pnn));
return;
}
TDB_DATA data;
int ret;
- DEBUG(0,("Banning remote node %u for %u seconds. Passing the ban request on to the remote node.\n", pnn, ban_time));
+ DEBUG(DEBUG_NOTICE,("Banning remote node %u for %u seconds. Passing the ban request on to the remote node.\n", pnn, ban_time));
b.pnn = pnn;
b.ban_time = ban_time;
ret = ctdb_send_message(ctdb, pnn, CTDB_SRVID_BAN_NODE, data);
if (ret != 0) {
- DEBUG(0,("Failed to ban node %u\n", pnn));
+ DEBUG(DEBUG_ERR,("Failed to ban node %u\n", pnn));
return;
}
return;
}
- DEBUG(0,("self ban - lowering our election priority\n"));
+ DEBUG(DEBUG_NOTICE,("self ban - lowering our election priority\n"));
ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), pnn, NODE_FLAGS_BANNED, 0);
/* banning ourselves - lower our election priority */
rec->node_flags |= NODE_FLAGS_BANNED;
if (rec->banned_nodes[pnn] != NULL) {
- DEBUG(0,("Re-banning an already banned node. Remove previous ban and set a new ban.\n"));
+ DEBUG(DEBUG_NOTICE,("Re-banning an already banned node. Remove previous ban and set a new ban.\n"));
talloc_free(rec->banned_nodes[pnn]);
rec->banned_nodes[pnn] = NULL;
}
enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_ELECTION_NEEDED, MONITOR_FAILED};
+/*
+ run the "recovered" eventscript on all nodes
+ */
+static int run_recovered_eventscript(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
+{
+ TALLOC_CTX *tmp_ctx;
+
+ tmp_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY(ctdb, tmp_ctx);
+
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_END_RECOVERY,
+ list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+ CONTROL_TIMEOUT(), false, tdb_null) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'recovered' event. Recovery failed.\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ talloc_free(tmp_ctx);
+ return 0;
+}
-/* freeze all nodes */
-static enum monitor_result freeze_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
+/*
+ run the "startrecovery" eventscript on all nodes
+ */
+static int run_startrecovery_eventscript(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
{
- struct async_data *async_data;
- TALLOC_CTX *mem_ctx = talloc_new(ctdb);
- struct ctdb_client_control_state *state;
- int j;
-
- async_data = talloc_zero(mem_ctx, struct async_data);
- CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+ TALLOC_CTX *tmp_ctx;
- /* loop over all active nodes and send an async freeze call to
- them*/
- for (j=0; j<nodemap->num; j++) {
- if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
- continue;
- }
- state = ctdb_ctrl_freeze_send(ctdb, mem_ctx,
- CONTROL_TIMEOUT(),
- nodemap->nodes[j].pnn);
- if (state == NULL) {
- /* we failed to send the control, treat this as
- an error and try again next iteration
- */
- DEBUG(0,("Failed to call ctdb_ctrl_freeze_send during recovery\n"));
- talloc_free(mem_ctx);
- return MONITOR_RECOVERY_NEEDED;
- }
-
- async_add(async_data, state);
- }
+ tmp_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY(ctdb, tmp_ctx);
- if (async_wait(ctdb, async_data) != 0) {
- DEBUG(0,(__location__ " Failed async freeze call\n"));
- talloc_free(mem_ctx);
- return MONITOR_RECOVERY_NEEDED;
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_START_RECOVERY,
+ list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+ CONTROL_TIMEOUT(), false, tdb_null) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'startrecovery' event. Recovery failed.\n"));
+ talloc_free(tmp_ctx);
+ return -1;
}
- talloc_free(mem_ctx);
- return MONITOR_OK;
+ talloc_free(tmp_ctx);
+ return 0;
}
-
/*
change recovery mode on all nodes
*/
static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode)
{
- int j, ret;
+ TDB_DATA data;
+ uint32_t *nodes;
+ TALLOC_CTX *tmp_ctx;
+
+ tmp_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY(ctdb, tmp_ctx);
+
+ nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
/* freeze all nodes */
if (rec_mode == CTDB_RECOVERY_ACTIVE) {
- ret = freeze_all_nodes(ctdb, nodemap);
- if (ret != MONITOR_OK) {
- DEBUG(0, (__location__ " Unable to freeze nodes. Recovery failed.\n"));
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_FREEZE,
+ nodes, CONTROL_TIMEOUT(),
+ false, tdb_null) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to freeze nodes. Recovery failed.\n"));
+ talloc_free(tmp_ctx);
return -1;
}
}
- /* set recovery mode to active on all nodes */
- for (j=0; j<nodemap->num; j++) {
- /* dont change it for nodes that are unavailable */
- if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
- continue;
- }
+ data.dsize = sizeof(uint32_t);
+ data.dptr = (unsigned char *)&rec_mode;
- ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, rec_mode);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to set recmode on node %u\n", nodemap->nodes[j].pnn));
- return -1;
- }
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMODE,
+ nodes, CONTROL_TIMEOUT(),
+ false, data) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery mode. Recovery failed.\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
- if (rec_mode == CTDB_RECOVERY_NORMAL) {
- ret = ctdb_ctrl_thaw(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to thaw node %u\n", nodemap->nodes[j].pnn));
- return -1;
- }
+ if (rec_mode == CTDB_RECOVERY_NORMAL) {
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_THAW,
+ nodes, CONTROL_TIMEOUT(),
+ false, tdb_null) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to thaw nodes. Recovery failed.\n"));
+ talloc_free(tmp_ctx);
+ return -1;
}
}
+ talloc_free(tmp_ctx);
return 0;
}
*/
static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t pnn)
{
- int j, ret;
+ TDB_DATA data;
+ TALLOC_CTX *tmp_ctx;
- /* set recovery master to pnn on all nodes */
- for (j=0; j<nodemap->num; j++) {
- /* dont change it for nodes that are unavailable */
- if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
- continue;
- }
+ tmp_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY(ctdb, tmp_ctx);
- ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, pnn);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to set recmaster on node %u\n", nodemap->nodes[j].pnn));
- return -1;
- }
+ data.dsize = sizeof(uint32_t);
+ data.dptr = (unsigned char *)&pnn;
+
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMASTER,
+ list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+ CONTROL_TIMEOUT(), false, data) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to set recmaster. Recovery failed.\n"));
+ talloc_free(tmp_ctx);
+ return -1;
}
+ talloc_free(tmp_ctx);
return 0;
}
ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
mem_ctx, &remote_dbmap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get dbids from node %u\n", pnn));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node %u\n", pnn));
return -1;
}
ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), pnn, dbmap->dbs[db].dbid,
mem_ctx, &name);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get dbname from node %u\n", pnn));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get dbname from node %u\n", pnn));
return -1;
}
ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
mem_ctx, name, dbmap->dbs[db].persistent);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to create remote db:%s\n", name));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to create remote db:%s\n", name));
return -1;
}
}
ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
mem_ctx, &remote_dbmap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get dbids from node %u\n", pnn));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node %u\n", pnn));
return -1;
}
ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
remote_dbmap->dbs[db].dbid, mem_ctx, &name);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get dbname from node %u\n",
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get dbname from node %u\n",
nodemap->nodes[j].pnn));
return -1;
}
ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, name,
remote_dbmap->dbs[db].persistent);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to create local db:%s\n", name));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to create local db:%s\n", name));
return -1;
}
ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, dbmap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to reread dbmap on node %u\n", pnn));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to reread dbmap on node %u\n", pnn));
return -1;
}
}
/*
- pull all the remote database contents into ours
+ pull the remote database contents from one node into the recdb
*/
-static int pull_all_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
- uint32_t pnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
+static int pull_one_remote_database(struct ctdb_context *ctdb, uint32_t srcnode,
+ struct tdb_wrap *recdb, uint32_t dbid)
{
- int i, j, ret;
+ int ret;
+ TDB_DATA outdata;
+ struct ctdb_control_pulldb_reply *reply;
+ struct ctdb_rec_data *rec;
+ int i;
+ TALLOC_CTX *tmp_ctx = talloc_new(recdb);
- /* pull all records from all other nodes across onto this node
- (this merges based on rsn)
- */
- for (i=0;i<dbmap->num;i++) {
- for (j=0; j<nodemap->num; j++) {
- /* we dont need to merge with ourselves */
- if (nodemap->nodes[j].pnn == pnn) {
- continue;
+ ret = ctdb_ctrl_pulldb(ctdb, srcnode, dbid, CTDB_LMASTER_ANY, tmp_ctx,
+ CONTROL_TIMEOUT(), &outdata);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Unable to copy db from node %u\n", srcnode));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ reply = (struct ctdb_control_pulldb_reply *)outdata.dptr;
+
+ if (outdata.dsize < offsetof(struct ctdb_control_pulldb_reply, data)) {
+ DEBUG(DEBUG_ERR,(__location__ " invalid data in pulldb reply\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ rec = (struct ctdb_rec_data *)&reply->data[0];
+
+ for (i=0;
+ i<reply->count;
+ rec = (struct ctdb_rec_data *)(rec->length + (uint8_t *)rec), i++) {
+ TDB_DATA key, data;
+ struct ctdb_ltdb_header *hdr;
+ TDB_DATA existing;
+
+ key.dptr = &rec->data[0];
+ key.dsize = rec->keylen;
+ data.dptr = &rec->data[key.dsize];
+ data.dsize = rec->datalen;
+
+ hdr = (struct ctdb_ltdb_header *)data.dptr;
+
+ if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+ DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ /* fetch the existing record, if any */
+ existing = tdb_fetch(recdb->tdb, key);
+
+ if (existing.dptr != NULL) {
+ struct ctdb_ltdb_header header;
+ if (existing.dsize < sizeof(struct ctdb_ltdb_header)) {
+ DEBUG(DEBUG_CRIT,(__location__ " Bad record size %u from node %u\n",
+ (unsigned)existing.dsize, srcnode));
+ free(existing.dptr);
+ talloc_free(tmp_ctx);
+ return -1;
}
- /* dont merge from nodes that are unavailable */
- if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
+ header = *(struct ctdb_ltdb_header *)existing.dptr;
+ free(existing.dptr);
+ if (!(header.rsn < hdr->rsn ||
+ (header.dmaster != ctdb->recovery_master && header.rsn == hdr->rsn))) {
continue;
}
- ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
- pnn, dbmap->dbs[i].dbid, CTDB_LMASTER_ANY, mem_ctx);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to copy db from node %u to node %u\n",
- nodemap->nodes[j].pnn, pnn));
- return -1;
- }
+ }
+
+ if (tdb_store(recdb->tdb, key, data, TDB_REPLACE) != 0) {
+ DEBUG(DEBUG_CRIT,(__location__ " Failed to store record\n"));
+ talloc_free(tmp_ctx);
+ return -1;
}
}
+ talloc_free(tmp_ctx);
+
return 0;
}
-
/*
- change the dmaster on all databases to point to us
+ pull all the remote database contents into the recdb
*/
-static int update_dmaster_on_our_databases(struct ctdb_context *ctdb, uint32_t pnn,
- struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
+static int pull_remote_database(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
+ struct tdb_wrap *recdb, uint32_t dbid)
{
- int i, ret;
+ int j;
- /* update dmaster to point to this node for all databases/nodes */
- for (i=0;i<dbmap->num;i++) {
- ret = ctdb_ctrl_setdmaster(ctdb, CONTROL_TIMEOUT(), pnn,
- ctdb, dbmap->dbs[i].dbid, pnn);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to set dmaster for node %u db:0x%08x\n",
- pnn, dbmap->dbs[i].dbid));
+ /* pull all records from all other nodes across onto this node
+ (this merges based on rsn)
+ */
+ for (j=0; j<nodemap->num; j++) {
+ /* dont merge from nodes that are unavailable */
+ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+ if (pull_one_remote_database(ctdb, nodemap->nodes[j].pnn, recdb, dbid) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to pull remote database from node %u\n",
+ nodemap->nodes[j].pnn));
return -1;
}
}
-
+
return 0;
}
return 0;
}
-/*
- vacuum one database
- */
-static int vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb_node_map *nodemap)
-{
- uint64_t max_rsn;
- int ret, i;
- TALLOC_CTX *mem_ctx = talloc_new(ctdb);
- struct async_data *async_data;
- struct ctdb_client_control_state *state;
-
- /* find max rsn on our local node for this db */
- ret = ctdb_ctrl_get_max_rsn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, db_id, &max_rsn);
- if (ret != 0) {
- talloc_free(mem_ctx);
- return -1;
- }
-
- async_data = talloc_zero(mem_ctx, struct async_data);
- CTDB_NO_MEMORY_FATAL(ctdb, async_data);
-
- /* set rsn on non-empty records to max_rsn+1 */
- for (i=0;i<nodemap->num;i++) {
- if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
- continue;
- }
- state = ctdb_ctrl_set_rsn_nonempty_send(ctdb, async_data, CONTROL_TIMEOUT(), nodemap->nodes[i].pnn,
- db_id, max_rsn+1);
- if (state == NULL) {
- DEBUG(0,(__location__ " Failed to set rsn on node %u to %llu\n",
- nodemap->nodes[i].pnn, (unsigned long long)max_rsn+1));
- talloc_free(mem_ctx);
- return -1;
- }
- async_add(async_data, state);
- }
-
- if (async_wait(ctdb, async_data) != 0) {
- DEBUG(0,(__location__ " Failed async calls to set rsn nonempty\n"));
- talloc_free(mem_ctx);
- return -1;
- }
-
-
- /* delete records with rsn < max_rsn+1 on all nodes */
- for (i=0;i<nodemap->num;i++) {
- if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
- continue;
- }
- state = ctdb_ctrl_delete_low_rsn_send(ctdb, async_data, CONTROL_TIMEOUT(), nodemap->nodes[i].pnn,
- db_id, max_rsn+1);
- if (state == NULL) {
- DEBUG(0,(__location__ " Failed to delete records on node %u with rsn below %llu\n",
- nodemap->nodes[i].pnn, (unsigned long long)max_rsn+1));
- talloc_free(mem_ctx);
- return -1;
- }
- async_add(async_data, state);
- }
-
- if (async_wait(ctdb, async_data) != 0) {
- DEBUG(0,(__location__ " Failed async calls to delete low rsn\n"));
- talloc_free(mem_ctx);
- return -1;
- }
-
- return 0;
-}
-
-
-/*
- vacuum all attached databases
- */
-static int vacuum_all_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
- struct ctdb_dbid_map *dbmap)
-{
- int i;
-
- /* update dmaster to point to this node for all databases/nodes */
- for (i=0;i<dbmap->num;i++) {
- if (vacuum_db(ctdb, dbmap->dbs[i].dbid, nodemap) != 0) {
- return -1;
- }
- }
- return 0;
-}
-
-/*
- push out all our database contents to all other nodes
- */
-static int push_all_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
- uint32_t pnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
-{
- int i;
-
- /* push all records out to the nodes again */
- for (i=0;i<dbmap->num;i++) {
- int j, ret;
- TDB_DATA outdata;
- struct async_data *async_data;
- struct ctdb_client_control_state *state;
-
- DEBUG(3,("pulling dbid 0x%x from local node %u\n",
- dbmap->dbs[i].dbid, pnn));
-
- async_data = talloc_zero(mem_ctx, struct async_data);
- CTDB_NO_MEMORY_FATAL(ctdb, async_data);
-
- ret = ctdb_ctrl_pulldb(ctdb, pnn, dbmap->dbs[i].dbid,
- CTDB_LMASTER_ANY,
- async_data, CONTROL_TIMEOUT(), &outdata);
- if (ret != 0) {
- DEBUG(0,(__location__ " ctdb_control for pulldb failed\n"));
- return -1;
- }
-
- for (j=0; j<nodemap->num; j++) {
- /* we dont need to push to ourselves */
- if (nodemap->nodes[j].pnn == pnn) {
- continue;
- }
- /* dont push to nodes that are unavailable */
- if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
- continue;
- }
-
- DEBUG(3,("starting async push of dbid 0x%x to %u\n",
- dbmap->dbs[i].dbid,
- nodemap->nodes[j].pnn));
-
- state = ctdb_ctrl_pushdb_send(ctdb,
- nodemap->nodes[j].pnn,
- dbmap->dbs[i].dbid, async_data,
- CONTROL_TIMEOUT(), outdata);
- if (state == NULL) {
- DEBUG(0,(__location__ " async control for pushdb for dbid 0x%08x to node %u failed\n", dbmap->dbs[i].dbid, nodemap->nodes[j].pnn));
- talloc_free(async_data);
- return -1;
- }
-
- async_add(async_data, state);
- }
-
- if (async_wait(ctdb, async_data) != 0) {
- DEBUG(0,("Async push of database 0x%08x failed\n", dbmap->dbs[i].dbid));
- talloc_free(async_data);
- return -1;
- }
-
- talloc_free(async_data);
- }
-
- return 0;
-}
-
/*
ensure all nodes have the same vnnmap we do
ret = ctdb_ctrl_setvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, mem_ctx, vnnmap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to set vnnmap for node %u\n", pnn));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to set vnnmap for node %u\n", pnn));
return -1;
}
}
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
if (data.dsize != sizeof(*b)) {
- DEBUG(0,("Bad data in ban_handler\n"));
+ DEBUG(DEBUG_ERR,("Bad data in ban_handler\n"));
talloc_free(mem_ctx);
return;
}
if (b->pnn != ctdb->pnn) {
- DEBUG(0,("Got a ban request for pnn:%u but our pnn is %u. Ignoring ban request\n", b->pnn, ctdb->pnn));
+ DEBUG(DEBUG_ERR,("Got a ban request for pnn:%u but our pnn is %u. Ignoring ban request\n", b->pnn, ctdb->pnn));
return;
}
- DEBUG(0,("Node %u has been banned for %u seconds\n",
+ DEBUG(DEBUG_NOTICE,("Node %u has been banned for %u seconds\n",
b->pnn, b->ban_time));
ctdb_ban_node(rec, b->pnn, b->ban_time);
uint32_t pnn;
if (data.dsize != sizeof(uint32_t)) {
- DEBUG(0,("Bad data in unban_handler\n"));
+ DEBUG(DEBUG_ERR,("Bad data in unban_handler\n"));
talloc_free(mem_ctx);
return;
}
pnn = *(uint32_t *)data.dptr;
if (pnn != ctdb->pnn) {
- DEBUG(0,("Got an unban request for pnn:%u but our pnn is %u. Ignoring unban request\n", pnn, ctdb->pnn));
+ DEBUG(DEBUG_ERR,("Got an unban request for pnn:%u but our pnn is %u. Ignoring unban request\n", pnn, ctdb->pnn));
return;
}
- DEBUG(0,("Node %u has been unbanned.\n", pnn));
+ DEBUG(DEBUG_NOTICE,("Node %u has been unbanned.\n", pnn));
ctdb_unban_node(rec, pnn);
talloc_free(mem_ctx);
}
+struct vacuum_info {
+ struct vacuum_info *next, *prev;
+ struct ctdb_recoverd *rec;
+ uint32_t srcnode;
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_control_pulldb_reply *recs;
+ struct ctdb_rec_data *r;
+};
+
+static void vacuum_fetch_next(struct vacuum_info *v);
+
+/*
+ called when a vacuum fetch has completed - just free it and do the next one
+ */
+static void vacuum_fetch_callback(struct ctdb_client_call_state *state)
+{
+ struct vacuum_info *v = talloc_get_type(state->async.private, struct vacuum_info);
+ talloc_free(state);
+ vacuum_fetch_next(v);
+}
+
+
+/*
+ process the next element from the vacuum list
+*/
+static void vacuum_fetch_next(struct vacuum_info *v)
+{
+ struct ctdb_call call;
+ struct ctdb_rec_data *r;
+
+ while (v->recs->count) {
+ struct ctdb_client_call_state *state;
+ TDB_DATA data;
+ struct ctdb_ltdb_header *hdr;
+
+ ZERO_STRUCT(call);
+ call.call_id = CTDB_NULL_FUNC;
+ call.flags = CTDB_IMMEDIATE_MIGRATION;
+
+ r = v->r;
+ v->r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r);
+ v->recs->count--;
+
+ call.key.dptr = &r->data[0];
+ call.key.dsize = r->keylen;
+
+ /* ensure we don't block this daemon - just skip a record if we can't get
+ the chainlock */
+ if (tdb_chainlock_nonblock(v->ctdb_db->ltdb->tdb, call.key) != 0) {
+ continue;
+ }
+
+ data = tdb_fetch(v->ctdb_db->ltdb->tdb, call.key);
+ if (data.dptr == NULL) {
+ tdb_chainunlock(v->ctdb_db->ltdb->tdb, call.key);
+ continue;
+ }
+
+ if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+ free(data.dptr);
+ tdb_chainunlock(v->ctdb_db->ltdb->tdb, call.key);
+ continue;
+ }
+
+ hdr = (struct ctdb_ltdb_header *)data.dptr;
+ if (hdr->dmaster == v->rec->ctdb->pnn) {
+ /* its already local */
+ free(data.dptr);
+ tdb_chainunlock(v->ctdb_db->ltdb->tdb, call.key);
+ continue;
+ }
+
+ free(data.dptr);
+
+ state = ctdb_call_send(v->ctdb_db, &call);
+ tdb_chainunlock(v->ctdb_db->ltdb->tdb, call.key);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to setup vacuum fetch call\n"));
+ talloc_free(v);
+ return;
+ }
+ state->async.fn = vacuum_fetch_callback;
+ state->async.private = v;
+ return;
+ }
+
+ talloc_free(v);
+}
+
+
+/*
+ destroy a vacuum info structure
+ */
+static int vacuum_info_destructor(struct vacuum_info *v)
+{
+ DLIST_REMOVE(v->rec->vacuum_info, v);
+ return 0;
+}
+
+
+/*
+ handler for vacuum fetch
+*/
+static void vacuum_fetch_handler(struct ctdb_context *ctdb, uint64_t srvid,
+ TDB_DATA data, void *private_data)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
+ struct ctdb_control_pulldb_reply *recs;
+ int ret, i;
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+ const char *name;
+ struct ctdb_dbid_map *dbmap=NULL;
+ bool persistent = false;
+ struct ctdb_db_context *ctdb_db;
+ struct ctdb_rec_data *r;
+ uint32_t srcnode;
+ struct vacuum_info *v;
+
+ recs = (struct ctdb_control_pulldb_reply *)data.dptr;
+ r = (struct ctdb_rec_data *)&recs->data[0];
+
+ if (recs->count == 0) {
+ return;
+ }
+
+ srcnode = r->reqid;
+
+ for (v=rec->vacuum_info;v;v=v->next) {
+ if (srcnode == v->srcnode && recs->db_id == v->ctdb_db->db_id) {
+ /* we're already working on records from this node */
+ return;
+ }
+ }
+
+ /* work out if the database is persistent */
+ ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &dbmap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from local node\n"));
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ for (i=0;i<dbmap->num;i++) {
+ if (dbmap->dbs[i].dbid == recs->db_id) {
+ persistent = dbmap->dbs[i].persistent;
+ break;
+ }
+ }
+ if (i == dbmap->num) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to find db_id 0x%x on local node\n", recs->db_id));
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ /* find the name of this database */
+ if (ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, recs->db_id, tmp_ctx, &name) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to get name of db 0x%x\n", recs->db_id));
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ /* attach to it */
+ ctdb_db = ctdb_attach(ctdb, name, persistent);
+ if (ctdb_db == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to attach to database '%s'\n", name));
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ v = talloc_zero(rec, struct vacuum_info);
+ if (v == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " Out of memory\n"));
+ return;
+ }
+
+ v->rec = rec;
+ v->srcnode = srcnode;
+ v->ctdb_db = ctdb_db;
+ v->recs = talloc_memdup(v, recs, data.dsize);
+ if (v->recs == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " Out of memory\n"));
+ talloc_free(v);
+ return;
+ }
+ v->r = (struct ctdb_rec_data *)&v->recs->data[0];
+
+ DLIST_ADD(rec->vacuum_info, v);
+
+ talloc_set_destructor(v, vacuum_info_destructor);
+
+ vacuum_fetch_next(v);
+}
+
/*
called when ctdb_wait_timeout should finish
if (rec->last_culprit != culprit ||
timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) {
- DEBUG(0,("New recovery culprit %u\n", culprit));
+ DEBUG(DEBUG_NOTICE,("New recovery culprit %u\n", culprit));
/* either a new node is the culprit, or we've decided to forgive them */
rec->last_culprit = culprit;
rec->first_recover_time = timeval_current();
ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
mem_ctx, &remote_nodemap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get nodemap from remote node %u\n",
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from remote node %u\n",
nodemap->nodes[j].pnn));
ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
talloc_free(mem_ctx);
/* Update our local copy of the flags in the recovery
daemon.
*/
- DEBUG(0,("Remote node %u had flags 0x%x, local had 0x%x - updating local\n",
+ DEBUG(DEBUG_NOTICE,("Remote node %u had flags 0x%x, local had 0x%x - updating local\n",
nodemap->nodes[j].pnn, remote_nodemap->nodes[j].flags,
nodemap->nodes[j].flags));
nodemap->nodes[j].flags = remote_nodemap->nodes[j].flags;
this is a good reason to do a new election.
*/
if ((c.old_flags ^ c.new_flags) & NODE_FLAGS_BANNED) {
- DEBUG(0,("Remote node %u had different BANNED flags 0x%x, local had 0x%x - trigger a re-election\n",
+ DEBUG(DEBUG_NOTICE,("Remote node %u had different BANNED flags 0x%x, local had 0x%x - trigger a re-election\n",
nodemap->nodes[j].pnn, c.new_flags,
c.old_flags));
talloc_free(mem_ctx);
return generation;
}
+
+/*
+ create a temporary working database
+ */
+static struct tdb_wrap *create_recdb(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx)
+{
+ char *name;
+ struct tdb_wrap *recdb;
+
+ /* open up the temporary recovery database */
+ name = talloc_asprintf(mem_ctx, "%s/recdb.tdb", ctdb->db_directory);
+ if (name == NULL) {
+ return NULL;
+ }
+ unlink(name);
+ recdb = tdb_wrap_open(mem_ctx, name, ctdb->tunable.database_hash_size,
+ TDB_NOLOCK, O_RDWR|O_CREAT|O_EXCL, 0600);
+ if (recdb == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " Failed to create temp recovery database '%s'\n", name));
+ }
+
+ talloc_free(name);
+
+ return recdb;
+}
+
+
+/*
+ a traverse function for pulling all relevent records from recdb
+ */
+struct recdb_data {
+ struct ctdb_context *ctdb;
+ struct ctdb_control_pulldb_reply *recdata;
+ uint32_t len;
+ bool failed;
+};
+
+static int traverse_recdb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
+{
+ struct recdb_data *params = (struct recdb_data *)p;
+ struct ctdb_rec_data *rec;
+ struct ctdb_ltdb_header *hdr;
+
+ /* skip empty records */
+ if (data.dsize <= sizeof(struct ctdb_ltdb_header)) {
+ return 0;
+ }
+
+ /* update the dmaster field to point to us */
+ hdr = (struct ctdb_ltdb_header *)data.dptr;
+ hdr->dmaster = params->ctdb->pnn;
+
+ /* add the record to the blob ready to send to the nodes */
+ rec = ctdb_marshall_record(params->recdata, 0, key, NULL, data);
+ if (rec == NULL) {
+ params->failed = true;
+ return -1;
+ }
+ params->recdata = talloc_realloc_size(NULL, params->recdata, rec->length + params->len);
+ if (params->recdata == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " Failed to expand recdata to %u (%u records)\n",
+ rec->length + params->len, params->recdata->count));
+ params->failed = true;
+ return -1;
+ }
+ params->recdata->count++;
+ memcpy(params->len+(uint8_t *)params->recdata, rec, rec->length);
+ params->len += rec->length;
+ talloc_free(rec);
+
+ return 0;
+}
+
+/*
+ push the recdb database out to all nodes
+ */
+static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
+ struct tdb_wrap *recdb, struct ctdb_node_map *nodemap)
+{
+ struct recdb_data params;
+ struct ctdb_control_pulldb_reply *recdata;
+ TDB_DATA outdata;
+ TALLOC_CTX *tmp_ctx;
+
+ tmp_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY(ctdb, tmp_ctx);
+
+ recdata = talloc_zero(recdb, struct ctdb_control_pulldb_reply);
+ CTDB_NO_MEMORY(ctdb, recdata);
+
+ recdata->db_id = dbid;
+
+ params.ctdb = ctdb;
+ params.recdata = recdata;
+ params.len = offsetof(struct ctdb_control_pulldb_reply, data);
+ params.failed = false;
+
+ if (tdb_traverse_read(recdb->tdb, traverse_recdb, ¶ms) == -1) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to traverse recdb database\n"));
+ talloc_free(params.recdata);
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ if (params.failed) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to traverse recdb database\n"));
+ talloc_free(params.recdata);
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ recdata = params.recdata;
+
+ outdata.dptr = (void *)recdata;
+ outdata.dsize = params.len;
+
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_PUSH_DB,
+ list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+ CONTROL_TIMEOUT(), false, outdata) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to push recdb records to nodes for db 0x%x\n", dbid));
+ talloc_free(recdata);
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery - pushed remote database 0x%x of size %u\n",
+ dbid, recdata->count));
+
+ talloc_free(recdata);
+ talloc_free(tmp_ctx);
+
+ return 0;
+}
+
+
+/*
+ go through a full recovery on one database
+ */
+static int recover_database(struct ctdb_recoverd *rec,
+ TALLOC_CTX *mem_ctx,
+ uint32_t dbid,
+ uint32_t pnn,
+ struct ctdb_node_map *nodemap,
+ uint32_t transaction_id)
+{
+ struct tdb_wrap *recdb;
+ int ret;
+ struct ctdb_context *ctdb = rec->ctdb;
+ TDB_DATA data;
+ struct ctdb_control_wipe_database w;
+
+ recdb = create_recdb(ctdb, mem_ctx);
+ if (recdb == NULL) {
+ return -1;
+ }
+
+ /* pull all remote databases onto the recdb */
+ ret = pull_remote_database(ctdb, nodemap, recdb, dbid);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to pull remote database 0x%x\n", dbid));
+ return -1;
+ }
+
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery - pulled remote database 0x%x\n", dbid));
+
+ /* wipe all the remote databases. This is safe as we are in a transaction */
+ w.db_id = dbid;
+ w.transaction_id = transaction_id;
+
+ data.dptr = (void *)&w;
+ data.dsize = sizeof(w);
+
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_WIPE_DATABASE,
+ list_of_active_nodes(ctdb, nodemap, recdb, true),
+ CONTROL_TIMEOUT(), false, data) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to wipe database. Recovery failed.\n"));
+ talloc_free(recdb);
+ return -1;
+ }
+
+ /* push out the correct database. This sets the dmaster and skips
+ the empty records */
+ ret = push_recdb_database(ctdb, dbid, recdb, nodemap);
+ if (ret != 0) {
+ talloc_free(recdb);
+ return -1;
+ }
+
+ /* all done with this database */
+ talloc_free(recdb);
+
+ return 0;
+}
+
/*
we are the recmaster, and recovery is needed - start a recovery run
int i, j, ret;
uint32_t generation;
struct ctdb_dbid_map *dbmap;
+ TDB_DATA data;
- DEBUG(0, (__location__ " Starting do_recovery\n"));
+ DEBUG(DEBUG_NOTICE, (__location__ " Starting do_recovery\n"));
/* if recovery fails, force it again */
rec->need_recovery = true;
ctdb_set_culprit(rec, culprit);
if (rec->culprit_counter > 2*nodemap->num) {
- DEBUG(0,("Node %u has caused %u recoveries in %.0f seconds - banning it for %u seconds\n",
+ DEBUG(DEBUG_NOTICE,("Node %u has caused %u recoveries in %.0f seconds - banning it for %u seconds\n",
culprit, rec->culprit_counter, timeval_elapsed(&rec->first_recover_time),
ctdb->tunable.recovery_ban_period));
ctdb_ban_node(rec, culprit, ctdb->tunable.recovery_ban_period);
if (!ctdb_recovery_lock(ctdb, true)) {
ctdb_set_culprit(rec, pnn);
- DEBUG(0,("Unable to get recovery lock - aborting recovery\n"));
+ DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery\n"));
+ return -1;
+ }
+
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery initiated due to problem with node %u\n", culprit));
+
+ /* get a list of all databases */
+ ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &dbmap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node :%u\n", pnn));
+ return -1;
+ }
+
+ /* we do the db creation before we set the recovery mode, so the freeze happens
+ on all databases we will be dealing with. */
+
+ /* verify that we have all the databases any other node has */
+ ret = create_missing_local_databases(ctdb, nodemap, pnn, &dbmap, mem_ctx);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to create missing local databases\n"));
+ return -1;
+ }
+
+ /* verify that all other nodes have all our databases */
+ ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to create missing remote databases\n"));
return -1;
}
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery - created remote databases\n"));
+
+
/* set recovery mode to active on all nodes */
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
if (ret!=0) {
- DEBUG(0, (__location__ " Unable to set recovery mode to active on cluster\n"));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery mode to active on cluster\n"));
return -1;
}
- DEBUG(0, (__location__ " Recovery initiated due to problem with node %u\n", culprit));
+ /* execute the "startrecovery" event script on all nodes */
+ ret = run_startrecovery_eventscript(ctdb, nodemap);
+ if (ret!=0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'startrecovery' event on cluster\n"));
+ return -1;
+ }
/* pick a new generation number */
generation = new_generation();
vnnmap->generation = generation;
ret = ctdb_ctrl_setvnnmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, vnnmap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to set vnnmap for node %u\n", pnn));
- return -1;
- }
-
- /* get a list of all databases */
- ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &dbmap);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get dbids from node :%u\n", pnn));
- return -1;
- }
-
-
- /* verify that all other nodes have all our databases */
- ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to create missing remote databases\n"));
- return -1;
- }
-
- /* verify that we have all the databases any other node has */
- ret = create_missing_local_databases(ctdb, nodemap, pnn, &dbmap, mem_ctx);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to create missing local databases\n"));
- return -1;
- }
-
- /* verify that all other nodes have all our databases */
- ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to create missing remote databases\n"));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to set vnnmap for node %u\n", pnn));
return -1;
}
+ data.dptr = (void *)&generation;
+ data.dsize = sizeof(uint32_t);
- DEBUG(0, (__location__ " Recovery - created remote databases\n"));
-
- /* pull all remote databases onto the local node */
- ret = pull_all_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to pull remote databases\n"));
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_START,
+ list_of_active_nodes(ctdb, nodemap, mem_ctx, true),
+ CONTROL_TIMEOUT(), false, data) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to start transactions. Recovery failed.\n"));
return -1;
}
- DEBUG(0, (__location__ " Recovery - pulled remote databases\n"));
+ DEBUG(DEBUG_NOTICE,(__location__ " started transactions on all nodes\n"));
- /* repoint all local database records to the local node as
- being dmaster
- */
- ret = update_dmaster_on_our_databases(ctdb, pnn, dbmap, mem_ctx);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to update dmaster on all databases\n"));
- return -1;
+ for (i=0;i<dbmap->num;i++) {
+ if (recover_database(rec, mem_ctx, dbmap->dbs[i].dbid, pnn, nodemap, generation) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to recover database 0x%x\n", dbmap->dbs[i].dbid));
+ return -1;
+ }
}
- DEBUG(0, (__location__ " Recovery - updated dmaster on our databases\n"));
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery - starting database commits\n"));
-
- /* push all local databases to the remote nodes */
- ret = push_all_local_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to push local databases\n"));
+ /* commit all the changes */
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT,
+ list_of_active_nodes(ctdb, nodemap, mem_ctx, true),
+ CONTROL_TIMEOUT(), false, data) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to commit recovery changes. Recovery failed.\n"));
return -1;
}
- DEBUG(0, (__location__ " Recovery - pushed remote databases\n"));
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery - committed databases\n"));
+
/* build a new vnn map with all the currently active and
unbanned nodes */
/* update to the new vnnmap on all nodes */
ret = update_vnnmap_on_all_nodes(ctdb, nodemap, pnn, vnnmap, mem_ctx);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to update vnnmap on all nodes\n"));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to update vnnmap on all nodes\n"));
return -1;
}
- DEBUG(0, (__location__ " Recovery - updated vnnmap\n"));
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery - updated vnnmap\n"));
/* update recmaster to point to us for all nodes */
ret = set_recovery_master(ctdb, nodemap, pnn);
if (ret!=0) {
- DEBUG(0, (__location__ " Unable to set recovery master\n"));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery master\n"));
return -1;
}
- DEBUG(0, (__location__ " Recovery - updated recmaster\n"));
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery - updated recmaster\n"));
/*
update all nodes to have the same flags that we have
*/
ret = update_flags_on_all_nodes(ctdb, nodemap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to update flags on all nodes\n"));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to update flags on all nodes\n"));
return -1;
}
- DEBUG(0, (__location__ " Recovery - updated flags\n"));
-
- /*
- run a vacuum operation on empty records
- */
- ret = vacuum_all_databases(ctdb, nodemap, dbmap);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to vacuum all databases\n"));
- return -1;
- }
-
- DEBUG(0, (__location__ " Recovery - vacuumed all databases\n"));
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery - updated flags\n"));
/*
if enabled, tell nodes to takeover their public IPs
rec->need_takeover_run = false;
ret = ctdb_takeover_run(ctdb, nodemap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses\n"));
return -1;
}
- DEBUG(1, (__location__ " Recovery - done takeover\n"));
+ DEBUG(DEBUG_INFO, (__location__ " Recovery - done takeover\n"));
}
- for (i=0;i<dbmap->num;i++) {
- DEBUG(2,("Recovered database with db_id 0x%08x\n", dbmap->dbs[i].dbid));
+ /* execute the "recovered" event script on all nodes */
+ ret = run_recovered_eventscript(ctdb, nodemap);
+ if (ret!=0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'recovered' event on cluster\n"));
+ return -1;
}
/* disable recovery mode */
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_NORMAL);
if (ret!=0) {
- DEBUG(0, (__location__ " Unable to set recovery mode to normal on cluster\n"));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery mode to normal on cluster\n"));
return -1;
}
has been reconfigured */
ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_RECONFIGURE, tdb_null);
- DEBUG(0, (__location__ " Recovery complete\n"));
+ DEBUG(DEBUG_NOTICE, (__location__ " Recovery complete\n"));
rec->need_recovery = false;
We now wait for rerecovery_timeout before we allow
another recovery to take place.
*/
- DEBUG(0, (__location__ " New recoveries supressed for the rerecovery timeout\n"));
+ DEBUG(DEBUG_NOTICE, (__location__ " New recoveries supressed for the rerecovery timeout\n"));
ctdb_wait_timeout(ctdb, ctdb->tunable.rerecovery_timeout);
- DEBUG(0, (__location__ " Rerecovery timeout elapsed. Recovery reactivated.\n"));
+ DEBUG(DEBUG_NOTICE, (__location__ " Rerecovery timeout elapsed. Recovery reactivated.\n"));
return 0;
}
ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, rec, &nodemap);
if (ret != 0) {
- DEBUG(0,(__location__ " unable to get election data\n"));
+ DEBUG(DEBUG_ERR,(__location__ " unable to get election data\n"));
return;
}
*/
ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), pnn, pnn);
if (ret != 0) {
- DEBUG(0, (__location__ " failed to send recmaster election request\n"));
+ DEBUG(DEBUG_ERR, (__location__ " failed to send recmaster election request\n"));
return -1;
}
ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);
if (ret != 0) {
- DEBUG(0,(__location__ " failed to get nodemap to unban all nodes\n"));
+ DEBUG(DEBUG_ERR,(__location__ " failed to get nodemap to unban all nodes\n"));
return;
}
ret = send_election_request(rec, ctdb_get_pnn(rec->ctdb));
if (ret != 0) {
- DEBUG(0,("Failed to send election request!\n"));
+ DEBUG(DEBUG_ERR,("Failed to send election request!\n"));
}
talloc_free(rec->send_election_te);
/* ok, let that guy become recmaster then */
ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), ctdb_get_pnn(ctdb), em->pnn);
if (ret != 0) {
- DEBUG(0, (__location__ " failed to send recmaster election request"));
+ DEBUG(DEBUG_ERR, (__location__ " failed to send recmaster election request"));
talloc_free(mem_ctx);
return;
}
/* set all nodes to recovery mode to stop all internode traffic */
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
if (ret!=0) {
- DEBUG(0, (__location__ " Unable to set recovery mode to active on cluster\n"));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery mode to active on cluster\n"));
return;
}
ret = send_election_request(rec, pnn);
if (ret!=0) {
- DEBUG(0, (__location__ " failed to initiate recmaster election"));
+ DEBUG(DEBUG_ERR, (__location__ " failed to initiate recmaster election"));
return;
}
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
if (data.dsize != sizeof(*c)) {
- DEBUG(0,(__location__ "Invalid data in ctdb_node_flag_change\n"));
+ DEBUG(DEBUG_ERR,(__location__ "Invalid data in ctdb_node_flag_change\n"));
return;
}
ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);
if (ret != 0) {
- DEBUG(0,(__location__ "ctdb_ctrl_getnodemap failed in monitor_handler\n"));
+ DEBUG(DEBUG_ERR,(__location__ "ctdb_ctrl_getnodemap failed in monitor_handler\n"));
talloc_free(tmp_ctx);
return;
}
}
if (i == nodemap->num) {
- DEBUG(0,(__location__ "Flag change for non-existant node %u\n", c->pnn));
+ DEBUG(DEBUG_CRIT,(__location__ "Flag change for non-existant node %u\n", c->pnn));
talloc_free(tmp_ctx);
return;
}
}
if (nodemap->nodes[i].flags != c->new_flags) {
- DEBUG(0,("Node %u has changed flags - now 0x%x was 0x%x\n", c->pnn, c->new_flags, c->old_flags));
+ DEBUG(DEBUG_NOTICE,("Node %u has changed flags - now 0x%x was 0x%x\n", c->pnn, c->new_flags, c->old_flags));
}
nodemap->nodes[i].flags = c->new_flags;
status field
*/
if (state->status != CTDB_RECOVERY_NORMAL) {
- DEBUG(0, (__location__ " Node:%u was in recovery mode. Restart recovery process\n", state->c->hdr.destnode));
+ DEBUG(DEBUG_NOTICE, (__location__ " Node:%u was in recovery mode. Restart recovery process\n", state->c->hdr.destnode));
rmdata->status = MONITOR_RECOVERY_NEEDED;
}
/* we failed to send the control, treat this as
an error and try again next iteration
*/
- DEBUG(0,("Failed to call ctdb_ctrl_getrecmode_send during monitoring\n"));
+ DEBUG(DEBUG_ERR,("Failed to call ctdb_ctrl_getrecmode_send during monitoring\n"));
talloc_free(mem_ctx);
return MONITOR_FAILED;
}
status field
*/
if (state->status != rmdata->pnn) {
- DEBUG(0,("Node %d does not agree we are the recmaster. Need a new recmaster election\n", state->c->hdr.destnode));
+ DEBUG(DEBUG_ERR,("Node %d does not agree we are the recmaster. Need a new recmaster election\n", state->c->hdr.destnode));
rmdata->status = MONITOR_ELECTION_NEEDED;
}
/* we failed to send the control, treat this as
an error and try again next iteration
*/
- DEBUG(0,("Failed to call ctdb_ctrl_getrecmaster_send during monitoring\n"));
+ DEBUG(DEBUG_ERR,("Failed to call ctdb_ctrl_getrecmaster_send during monitoring\n"));
talloc_free(mem_ctx);
return MONITOR_FAILED;
}
struct ctdb_all_public_ips *ips;
char c;
- DEBUG(0,("monitor_cluster starting\n"));
+ DEBUG(DEBUG_NOTICE,("monitor_cluster starting\n"));
rec = talloc_zero(ctdb, struct ctdb_recoverd);
CTDB_NO_MEMORY_FATAL(ctdb, rec);
/* and one for when nodes are unbanned */
ctdb_set_message_handler(ctdb, CTDB_SRVID_UNBAN_NODE, unban_handler, rec);
+
+ /* register a message port for vacuum fetch */
+ ctdb_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec);
again:
if (mem_ctx) {
}
mem_ctx = talloc_new(ctdb);
if (!mem_ctx) {
- DEBUG(0,("Failed to create temporary context\n"));
+ DEBUG(DEBUG_CRIT,(__location__ " Failed to create temporary context\n"));
exit(-1);
}
/* we only check for recovery once every second */
ctdb_wait_timeout(ctdb, ctdb->tunable.recover_interval);
+ /* verify that the main daemon is still running */
+ if (kill(ctdb->ctdbd_pid, 0) != 0) {
+ DEBUG(DEBUG_CRIT,("CTDB daemon is no longer available. Shutting down recovery daemon\n"));
+ exit(-1);
+ }
+
if (rec->election_timeout) {
/* an election is in progress */
goto again;
map from the local node. thats why we have the hardcoded value 20
*/
if (rec->culprit_counter > 20) {
- DEBUG(0,("Node %u has caused %u failures in %.0f seconds - banning it for %u seconds\n",
+ DEBUG(DEBUG_NOTICE,("Node %u has caused %u failures in %.0f seconds - banning it for %u seconds\n",
rec->last_culprit, rec->culprit_counter, timeval_elapsed(&rec->first_recover_time),
ctdb->tunable.recovery_ban_period));
ctdb_ban_node(rec, rec->last_culprit, ctdb->tunable.recovery_ban_period);
/* get relevant tunables */
ret = ctdb_ctrl_get_all_tunables(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &ctdb->tunable);
if (ret != 0) {
- DEBUG(0,("Failed to get tunables - retrying\n"));
+ DEBUG(DEBUG_ERR,("Failed to get tunables - retrying\n"));
goto again;
}
pnn = ctdb_ctrl_getpnn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
if (pnn == (uint32_t)-1) {
- DEBUG(0,("Failed to get local pnn - retrying\n"));
+ DEBUG(DEBUG_ERR,("Failed to get local pnn - retrying\n"));
goto again;
}
/* get the vnnmap */
ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &vnnmap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get vnnmap from node %u\n", pnn));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get vnnmap from node %u\n", pnn));
goto again;
}
/* get number of nodes */
ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &nodemap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get nodemap from node %u\n", pnn));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", pnn));
goto again;
}
/* check which node is the recovery master */
ret = ctdb_ctrl_getrecmaster(ctdb, mem_ctx, CONTROL_TIMEOUT(), pnn, &recmaster);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", pnn));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from node %u\n", pnn));
goto again;
}
if (recmaster == (uint32_t)-1) {
- DEBUG(0,(__location__ " Initial recovery master set - forcing election\n"));
+ DEBUG(DEBUG_NOTICE,(__location__ " Initial recovery master set - forcing election\n"));
force_election(rec, mem_ctx, pnn, nodemap);
goto again;
}
if (nodemap->nodes[pnn].flags & NODE_FLAGS_BANNED) {
if (rec->banned_nodes[pnn] == NULL) {
if (recmaster == pnn) {
- DEBUG(0,("Local ctdb daemon on recmaster thinks this node is BANNED but the recovery master disagrees. Unbanning the node\n"));
+ DEBUG(DEBUG_NOTICE,("Local ctdb daemon on recmaster thinks this node is BANNED but the recovery master disagrees. Unbanning the node\n"));
ctdb_unban_node(rec, pnn);
} else {
- DEBUG(0,("Local ctdb daemon on non-recmaster thinks this node is BANNED but the recovery master disagrees. Re-banning the node\n"));
+ DEBUG(DEBUG_NOTICE,("Local ctdb daemon on non-recmaster thinks this node is BANNED but the recovery master disagrees. Re-banning the node\n"));
ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
ctdb_set_culprit(rec, pnn);
}
} else {
if (rec->banned_nodes[pnn] != NULL) {
if (recmaster == pnn) {
- DEBUG(0,("Local ctdb daemon on recmaster does not think this node is BANNED but the recovery master disagrees. Unbanning the node\n"));
+ DEBUG(DEBUG_NOTICE,("Local ctdb daemon on recmaster does not think this node is BANNED but the recovery master disagrees. Unbanning the node\n"));
ctdb_unban_node(rec, pnn);
} else {
- DEBUG(0,("Local ctdb daemon on non-recmaster does not think this node is BANNED but the recovery master disagrees. Re-banning the node\n"));
+ DEBUG(DEBUG_NOTICE,("Local ctdb daemon on non-recmaster does not think this node is BANNED but the recovery master disagrees. Re-banning the node\n"));
ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
ctdb_set_culprit(rec, pnn);
}
if (j == nodemap->num) {
- DEBUG(0, ("Recmaster node %u not in list. Force reelection\n", recmaster));
+ DEBUG(DEBUG_ERR, ("Recmaster node %u not in list. Force reelection\n", recmaster));
force_election(rec, mem_ctx, pnn, nodemap);
goto again;
}
/* if recovery master is disconnected we must elect a new recmaster */
if (nodemap->nodes[j].flags & NODE_FLAGS_DISCONNECTED) {
- DEBUG(0, ("Recmaster node %u is disconnected. Force reelection\n", nodemap->nodes[j].pnn));
+ DEBUG(DEBUG_NOTICE, ("Recmaster node %u is disconnected. Force reelection\n", nodemap->nodes[j].pnn));
force_election(rec, mem_ctx, pnn, nodemap);
goto again;
}
ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
mem_ctx, &remote_nodemap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get nodemap from recovery master %u\n",
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from recovery master %u\n",
nodemap->nodes[j].pnn));
goto again;
}
if (remote_nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
- DEBUG(0, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].pnn));
+ DEBUG(DEBUG_NOTICE, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].pnn));
force_election(rec, mem_ctx, pnn, nodemap);
goto again;
}
if (ctdb->vnn != NULL) {
ret = ctdb_ctrl_get_public_ips(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
if (ret != 0) {
- DEBUG(0, ("Unable to get public ips from node %u\n", i));
+ DEBUG(DEBUG_ERR, ("Unable to get public ips from node %u\n", i));
goto again;
}
for (j=0; j<ips->num; j++) {
*/
if (ips->ips[j].pnn == pnn) {
if (!ctdb_sys_have_ip(ips->ips[j].sin)) {
- DEBUG(0,("Public address '%s' is missing and we should serve this ip\n", inet_ntoa(ips->ips[j].sin.sin_addr)));
+ DEBUG(DEBUG_CRIT,("Public address '%s' is missing and we should serve this ip\n", inet_ntoa(ips->ips[j].sin.sin_addr)));
ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
if (ret != 0) {
- DEBUG(0,(__location__ " Failed to freeze node due to public ip address mismatches\n"));
+ DEBUG(DEBUG_ERR,(__location__ " Failed to freeze node due to public ip address mismatches\n"));
goto again;
}
ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE);
if (ret != 0) {
- DEBUG(0,(__location__ " Failed to activate recovery mode due to public ip address mismatches\n"));
+ DEBUG(DEBUG_ERR,(__location__ " Failed to activate recovery mode due to public ip address mismatches\n"));
goto again;
}
}
} else {
if (ctdb_sys_have_ip(ips->ips[j].sin)) {
- DEBUG(0,("We are still serving a public address '%s' that we should not be serving.\n", inet_ntoa(ips->ips[j].sin.sin_addr)));
+ DEBUG(DEBUG_CRIT,("We are still serving a public address '%s' that we should not be serving.\n", inet_ntoa(ips->ips[j].sin.sin_addr)));
ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
if (ret != 0) {
- DEBUG(0,(__location__ " Failed to freeze node due to public ip address mismatches\n"));
+ DEBUG(DEBUG_ERR,(__location__ " Failed to freeze node due to public ip address mismatches\n"));
goto again;
}
ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE);
if (ret != 0) {
- DEBUG(0,(__location__ " Failed to activate recovery mode due to public ip address mismatches\n"));
+ DEBUG(DEBUG_ERR,(__location__ " Failed to activate recovery mode due to public ip address mismatches\n"));
goto again;
}
}
/* ensure our local copies of flags are right */
ret = update_local_flags(rec, nodemap);
if (ret == MONITOR_ELECTION_NEEDED) {
- DEBUG(0,("update_local_flags() called for a re-election.\n"));
+ DEBUG(DEBUG_NOTICE,("update_local_flags() called for a re-election.\n"));
force_election(rec, mem_ctx, pnn, nodemap);
goto again;
}
if (ret != MONITOR_OK) {
- DEBUG(0,("Unable to update local flags\n"));
+ DEBUG(DEBUG_ERR,("Unable to update local flags\n"));
goto again;
}
ctdb->nodes[j]->pnn,
ctdb->nodes,
&ctdb->nodes[j]->public_ips)) {
- DEBUG(0,("Failed to read public ips from node : %u\n",
+ DEBUG(DEBUG_ERR,("Failed to read public ips from node : %u\n",
ctdb->nodes[j]->pnn));
goto again;
}
/* we should have the reclock - check its not stale */
if (ctdb->recovery_lock_fd == -1) {
- DEBUG(0,("recovery master doesn't have the recovery lock\n"));
+ DEBUG(DEBUG_CRIT,("recovery master doesn't have the recovery lock\n"));
do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, ctdb->pnn);
goto again;
}
if (read(ctdb->recovery_lock_fd, &c, 1) == -1) {
- DEBUG(0,("failed read from recovery_lock_fd - %s\n", strerror(errno)));
+ DEBUG(DEBUG_CRIT,("failed read from recovery_lock_fd - %s\n", strerror(errno)));
close(ctdb->recovery_lock_fd);
ctdb->recovery_lock_fd = -1;
do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, ctdb->pnn);
ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
mem_ctx, &remote_nodemap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get nodemap from remote node %u\n",
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from remote node %u\n",
nodemap->nodes[j].pnn));
goto again;
}
then this is a good reason to try recovery
*/
if (remote_nodemap->num != nodemap->num) {
- DEBUG(0, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n",
+ DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n",
nodemap->nodes[j].pnn, remote_nodemap->num, nodemap->num));
do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn);
goto again;
*/
for (i=0;i<nodemap->num;i++) {
if (remote_nodemap->nodes[i].pnn != nodemap->nodes[i].pnn) {
- DEBUG(0, (__location__ " Remote node:%u has different nodemap pnn for %d (%u vs %u).\n",
+ DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different nodemap pnn for %d (%u vs %u).\n",
nodemap->nodes[j].pnn, i,
remote_nodemap->nodes[i].pnn, nodemap->nodes[i].pnn));
do_recovery(rec, mem_ctx, pnn, num_active, nodemap,
}
if ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) !=
(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
- DEBUG(0, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n",
+ DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n",
nodemap->nodes[j].pnn, i,
remote_nodemap->nodes[i].flags, nodemap->nodes[i].flags));
do_recovery(rec, mem_ctx, pnn, num_active, nodemap,
as there are active nodes or we will have to do a recovery
*/
if (vnnmap->size != num_active) {
- DEBUG(0, (__location__ " The vnnmap count is different from the number of active nodes. %u vs %u\n",
+ DEBUG(DEBUG_ERR, (__location__ " The vnnmap count is different from the number of active nodes. %u vs %u\n",
vnnmap->size, num_active));
do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, ctdb->pnn);
goto again;
}
}
if (i == vnnmap->size) {
- DEBUG(0, (__location__ " Node %u is active in the nodemap but did not exist in the vnnmap\n",
+ DEBUG(DEBUG_ERR, (__location__ " Node %u is active in the nodemap but did not exist in the vnnmap\n",
nodemap->nodes[j].pnn));
do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn);
goto again;
ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
mem_ctx, &remote_vnnmap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get vnnmap from remote node %u\n",
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get vnnmap from remote node %u\n",
nodemap->nodes[j].pnn));
goto again;
}
/* verify the vnnmap generation is the same */
if (vnnmap->generation != remote_vnnmap->generation) {
- DEBUG(0, (__location__ " Remote node %u has different generation of vnnmap. %u vs %u (ours)\n",
+ DEBUG(DEBUG_ERR, (__location__ " Remote node %u has different generation of vnnmap. %u vs %u (ours)\n",
nodemap->nodes[j].pnn, remote_vnnmap->generation, vnnmap->generation));
do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn);
goto again;
/* verify the vnnmap size is the same */
if (vnnmap->size != remote_vnnmap->size) {
- DEBUG(0, (__location__ " Remote node %u has different size of vnnmap. %u vs %u (ours)\n",
+ DEBUG(DEBUG_ERR, (__location__ " Remote node %u has different size of vnnmap. %u vs %u (ours)\n",
nodemap->nodes[j].pnn, remote_vnnmap->size, vnnmap->size));
do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn);
goto again;
/* verify the vnnmap is the same */
for (i=0;i<vnnmap->size;i++) {
if (remote_vnnmap->map[i] != vnnmap->map[i]) {
- DEBUG(0, (__location__ " Remote node %u has different vnnmap.\n",
+ DEBUG(DEBUG_ERR, (__location__ " Remote node %u has different vnnmap.\n",
nodemap->nodes[j].pnn));
do_recovery(rec, mem_ctx, pnn, num_active, nodemap,
vnnmap, nodemap->nodes[j].pnn);
/* we might need to change who has what IP assigned */
if (rec->need_takeover_run) {
rec->need_takeover_run = false;
+
+ /* execute the "startrecovery" event script on all nodes */
+ ret = run_startrecovery_eventscript(ctdb, nodemap);
+ if (ret!=0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'startrecovery' event on cluster\n"));
+ do_recovery(rec, mem_ctx, pnn, num_active, nodemap,
+ vnnmap, ctdb->pnn);
+ }
+
ret = ctdb_takeover_run(ctdb, nodemap);
if (ret != 0) {
- DEBUG(0, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
+ do_recovery(rec, mem_ctx, pnn, num_active, nodemap,
+ vnnmap, ctdb->pnn);
+ }
+
+ /* execute the "recovered" event script on all nodes */
+ ret = run_recovered_eventscript(ctdb, nodemap);
+ if (ret!=0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'recovered' event on cluster\n"));
do_recovery(rec, mem_ctx, pnn, num_active, nodemap,
vnnmap, ctdb->pnn);
}
static void ctdb_recoverd_parent(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
- DEBUG(0,("recovery daemon parent died - exiting\n"));
+ DEBUG(DEBUG_ALERT,("recovery daemon parent died - exiting\n"));
_exit(1);
}
return -1;
}
+ ctdb->ctdbd_pid = getpid();
+
ctdb->recoverd_pid = fork();
if (ctdb->recoverd_pid == -1) {
return -1;
srandom(getpid() ^ time(NULL));
+ /* the recovery daemon does not need to be realtime */
+ if (ctdb->do_setsched) {
+ ctdb_restore_scheduler(ctdb);
+ }
+
/* initialise ctdb */
ret = ctdb_socket_connect(ctdb);
if (ret != 0) {
- DEBUG(0, (__location__ " Failed to init ctdb\n"));
+ DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb\n"));
exit(1);
}
monitor_cluster(ctdb);
- DEBUG(0,("ERROR: ctdb_recoverd finished!?\n"));
+ DEBUG(DEBUG_ALERT,("ERROR: ctdb_recoverd finished!?\n"));
return -1;
}
return;
}
- DEBUG(0,("Shutting down recovery daemon\n"));
+ DEBUG(DEBUG_NOTICE,("Shutting down recovery daemon\n"));
kill(ctdb->recoverd_pid, SIGTERM);
}