struct ban_state **banned_nodes;
struct timeval priority_time;
bool need_takeover_run;
+ bool need_recovery;
};
#define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
uint32_t generation;
struct ctdb_dbid_map *dbmap;
+ /* if recovery fails, force it again */
+ rec->need_recovery = true;
+
if (rec->last_culprit != culprit ||
timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) {
/* either a new node is the culprit, or we've decide to forgive them */
ret = ctdb_takeover_run(ctdb, nodemap);
if (ret != 0) {
DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
- rec->need_takeover_run = true;
return -1;
}
DEBUG(1, (__location__ " Recovery - done takeover\n"));
DEBUG(0, (__location__ " Recovery complete\n"));
+ rec->need_recovery = false;
+
/* We just finished a recovery successfully.
We now wait for rerecovery_timeout before we allow
another recovery to take place.
}
+ if (rec->need_recovery) {
+ /* a previous recovery didn't finish */
+ do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn);
+ goto again;
+ }
+
/* verify that all active nodes are in normal mode
and not in recovery mode
*/
ret = ctdb_takeover_run(ctdb, nodemap);
if (ret != 0) {
DEBUG(0, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
- rec->need_takeover_run = true;
do_recovery(rec, mem_ctx, pnn, num_active, nodemap,
vnnmap, nodemap->nodes[j].pnn);
}