{ "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) },
{ "TickleUpdateInterval",20, offsetof(struct ctdb_tunable, tickle_update_interval) },
{ "EventScriptTimeout", 20, offsetof(struct ctdb_tunable, script_timeout) },
- { "EventScriptBanCount", 3, offsetof(struct ctdb_tunable, script_ban_count) },
+ { "EventScriptBanCount", 5, offsetof(struct ctdb_tunable, script_ban_count) },
{ "RecoveryGracePeriod", 60, offsetof(struct ctdb_tunable, recovery_grace_period) },
{ "RecoveryBanPeriod", 300, offsetof(struct ctdb_tunable, recovery_ban_period) },
{ "DatabaseHashSize", 10000, offsetof(struct ctdb_tunable, database_hash_size) },
DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
- talloc_free(state);
- callback(ctdb, -1, private_data);
-
- ctdb->event_script_timeouts++;
- if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
- ctdb->event_script_timeouts = 0;
- DEBUG(DEBUG_ERR, ("Maximum timeout count reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.recovery_ban_period));
+ if (!strcmp(state->options, "monitor")) {
+ /* if it is a monitor event, we allow it to "hang" a few times
+ before we declare it a failure and ban ourself (and make
+ ourself unhealthy)
+ */
+ DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
+
+ ctdb->event_script_timeouts++;
+ if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
+ ctdb->event_script_timeouts = 0;
+ DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
+ ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
+ callback(ctdb, -1, private_data);
+ } else {
+ callback(ctdb, 0, private_data);
+ }
+ } else if (!strcmp(state->options, "startup")) {
+ DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
+ callback(ctdb, -1, private_data);
+ } else {
+ /* if it is not a monitor event we ban ourself immediately */
+ DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
+ callback(ctdb, -1, private_data);
}
+
+ talloc_free(state);
}
/*