rddata.dptr = (uint8_t *)&rd;
rddata.dsize = sizeof(rd);
+ if (status == -ETIME) {
+ ctdb->event_script_timeouts++;
+
+ if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
+ DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
+ } else {
+ /* We pretend this is OK. */
+ status = 0;
+ }
+ }
+
if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
node->flags |= NODE_FLAGS_UNHEALTHY;
if (status != 0) {
DEBUG(DEBUG_ERR,(__location__ " recovered event script failed (status %d)\n", status));
+ if (status == -ETIME) {
+ ctdb_ban_self(ctdb);
+ }
}
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
if (status != 0) {
DEBUG(DEBUG_ERR,(__location__ " stopped event script failed (status %d)\n", status));
ctdb->nodes[ctdb->pnn]->flags &= ~NODE_FLAGS_STOPPED;
+ if (status == -ETIME) {
+ ctdb_ban_self(ctdb);
+ }
}
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
struct ctdb_tcp_array *tcparray;
if (status != 0) {
+ if (status == -ETIME) {
+ ctdb_ban_self(ctdb);
+ }
DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
ctdb_addr_to_str(state->addr),
state->vnn->iface));
talloc_get_type(private_data, struct takeover_callback_state);
TDB_DATA data;
+ if (status == -ETIME) {
+ ctdb_ban_self(ctdb);
+ }
+
/* send a message to all clients of this node telling them
that the cluster has been reconfigured and they should
release any sockets on this IP */
DEBUG(DEBUG_ERR,("Event script timed out : %s %s count : %u pid : %d\n",
call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));
+ state->cb_status = -ETIME;
+
if (kill(state->child, 0) != 0) {
DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
state->child = 0;
- state->cb_status = -ETIME;
- talloc_free(state);
- return;
- }
-
- if (state->call == CTDB_EVENT_MONITOR) {
- /* if it is a monitor event, we allow it to "hang" a few times
- before we declare it a failure and ban ourself (and make
- ourself unhealthy)
- */
- DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
-
- ctdb->event_script_timeouts++;
-
- if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
- DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
- state->cb_status = -ETIME;
- } else {
- state->cb_status = 0;
- }
- } else if (state->call == CTDB_EVENT_STARTUP) {
- DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
- state->cb_status = -ETIME;
- } else {
- /* if it is not a monitor or a startup event we ban ourself
- immediately
- */
- DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
-
- ctdb_ban_self(ctdb);
-
- state->cb_status = -ETIME;
}
if (state->call == CTDB_EVENT_MONITOR || state->call == CTDB_EVENT_STATUS) {
struct ctdb_monitor_script_status *script;
- if (ctdb->current_monitor_status_ctx == NULL) {
- talloc_free(state);
- return;
- }
+ if (ctdb->current_monitor_status_ctx != NULL) {
+ script = ctdb->current_monitor_status_ctx->scripts;
+ if (script != NULL) {
+ script->status = state->cb_status;
+ }
- script = ctdb->current_monitor_status_ctx->scripts;
- if (script != NULL) {
- script->status = state->cb_status;
+ ctdb_control_event_script_finished(ctdb);
}
-
- ctdb_control_event_script_finished(ctdb);
}
talloc_free(state);
while (status.done == false && event_loop_once(ctdb->ev) == 0) /* noop */;
+ if (status.status == -ETIME) {
+ DEBUG(DEBUG_ERR, (__location__ " eventscript for '%s' timedout."
+ " Immediately banning ourself for %d seconds\n",
+ call_names[call],
+ ctdb->tunable.recovery_ban_period));
+ ctdb_ban_self(ctdb);
+ }
+
return status.status;
}