struct timeval last_reported_time;
};
+struct ctdb_recovery_lock_handle;
+
/*
private state of recovery daemon
*/
uint32_t *force_rebalance_nodes;
struct ctdb_node_capabilities *caps;
bool frozen_on_inactive;
- struct ctdb_cluster_mutex_handle *recovery_lock_handle;
+ struct ctdb_recovery_lock_handle *recovery_lock_handle;
};
#define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
return (rec->recovery_lock_handle != NULL);
}
-struct hold_reclock_state {
+struct ctdb_recovery_lock_handle {
bool done;
bool locked;
double latency;
+ struct ctdb_cluster_mutex_handle *h;
};
static void take_reclock_handler(char status,
double latency,
void *private_data)
{
- struct hold_reclock_state *s =
- (struct hold_reclock_state *) private_data;
+ struct ctdb_recovery_lock_handle *s =
+ (struct ctdb_recovery_lock_handle *) private_data;
switch (status) {
case '0':
{
struct ctdb_context *ctdb = rec->ctdb;
struct ctdb_cluster_mutex_handle *h;
- struct hold_reclock_state s = {
- .done = false,
- .locked = false,
- .latency = 0,
+ struct ctdb_recovery_lock_handle *s;
+
+ s = talloc_zero(rec, struct ctdb_recovery_lock_handle);
+ if (s == NULL) {
+ DBG_ERR("Memory allocation error\n");
+ return false;
};
- h = ctdb_cluster_mutex(rec, ctdb, ctdb->recovery_lock, 0,
- take_reclock_handler, &s,
- lost_reclock_handler, rec);
+ h = ctdb_cluster_mutex(s,
+ ctdb,
+ ctdb->recovery_lock,
+ 0,
+ take_reclock_handler,
+ s,
+ lost_reclock_handler,
+ rec);
if (h == NULL) {
+ talloc_free(s);
return false;
}
- while (!s.done) {
+ while (! s->done) {
tevent_loop_once(ctdb->ev);
}
- if (! s.locked) {
- talloc_free(h);
+ if (! s->locked) {
+ talloc_free(s);
return false;
}
- rec->recovery_lock_handle = h;
- ctdb_ctrl_report_recd_lock_latency(ctdb, CONTROL_TIMEOUT(),
- s.latency);
+ rec->recovery_lock_handle = s;
+ s->h = h;
+ ctdb_ctrl_report_recd_lock_latency(ctdb,
+ CONTROL_TIMEOUT(),
+ s->latency);
return true;
}
goto fail;
}
- if (ctdb->recovery_lock != NULL) {
+ if (ctdb->recovery_lock != NULL) {
if (ctdb_recovery_have_lock(rec)) {
- DEBUG(DEBUG_NOTICE, ("Already holding recovery lock\n"));
+ D_NOTICE("Already holding recovery lock\n");
} else {
- DEBUG(DEBUG_NOTICE, ("Attempting to take recovery lock (%s)\n",
- ctdb->recovery_lock));
- if (!ctdb_recovery_lock(rec)) {
- if (ctdb->runstate == CTDB_RUNSTATE_FIRST_RECOVERY) {
- /* If ctdb is trying first recovery, it's
- * possible that current node does not know
- * yet who the recmaster is.
+ bool ok;
+
+ D_NOTICE("Attempting to take recovery lock (%s)\n",
+ ctdb->recovery_lock);
+
+ ok = ctdb_recovery_lock(rec);
+ if (! ok) {
+ D_ERR("Unable to take recovery lock\n");
+
+ if (pnn != rec->recmaster) {
+ D_NOTICE("Recovery master changed to %u,"
+ " aborting recovery\n",
+ rec->recmaster);
+ rec->need_recovery = false;
+ goto fail;
+ }
+
+ if (ctdb->runstate ==
+ CTDB_RUNSTATE_FIRST_RECOVERY) {
+ /*
+ * First recovery? Perhaps
+ * current node does not yet
+ * know who the recmaster is.
*/
- DEBUG(DEBUG_ERR, ("Unable to get recovery lock"
- " - retrying recovery\n"));
+ D_ERR("Retrying recovery\n");
goto fail;
}
- DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery "
- "and ban ourself for %u seconds\n",
- ctdb->tunable.recovery_ban_period));
- ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
+ D_ERR("Abort recovery, "
+ "ban this node for %u seconds\n",
+ ctdb->tunable.recovery_ban_period);
+ ctdb_ban_node(rec,
+ pnn,
+ ctdb->tunable.recovery_ban_period);
goto fail;
}
- DEBUG(DEBUG_NOTICE,
- ("Recovery lock taken successfully by recovery daemon\n"));
+ D_NOTICE("Recovery lock taken successfully\n");
}
}