X-Git-Url: http://git.samba.org/?a=blobdiff_plain;f=server%2Fctdb_recoverd.c;h=631f53e89bd4d04908b4b3abc8b1da761d86163a;hb=0fedef0ffba4178126eee9544c5e2db52f5db893;hp=dc1a59d06792fd4e076bad754498dac41c08fd23;hpb=85e5e760cc91eb3157d3a88996ce474491646726;p=sahlberg%2Fctdb.git diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c index dc1a59d0..631f53e8 100644 --- a/server/ctdb_recoverd.c +++ b/server/ctdb_recoverd.c @@ -70,6 +70,7 @@ struct ctdb_recoverd { #define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0) #define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0) +static void ctdb_restart_recd(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data); /* ban a node for a period of time @@ -438,7 +439,8 @@ static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctd return -1; } ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, - mem_ctx, name, dbmap->dbs[db].persistent); + mem_ctx, name, + dbmap->dbs[db].flags & CTDB_DB_FLAGS_PERSISTENT); if (ret != 0) { DEBUG(DEBUG_ERR, (__location__ " Unable to create remote db:%s\n", name)); return -1; @@ -501,7 +503,7 @@ static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb return -1; } ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, name, - remote_dbmap->dbs[db].persistent); + remote_dbmap->dbs[db].flags & CTDB_DB_FLAGS_PERSISTENT); if (ret != 0) { DEBUG(DEBUG_ERR, (__location__ " Unable to create local db:%s\n", name)); return -1; @@ -713,6 +715,7 @@ static void vacuum_fetch_next(struct vacuum_info *v) ZERO_STRUCT(call); call.call_id = CTDB_NULL_FUNC; call.flags = CTDB_IMMEDIATE_MIGRATION; + call.flags |= CTDB_CALL_FLAG_VACUUM_MIGRATION; r = v->r; v->r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r); @@ -821,7 +824,7 @@ static void vacuum_fetch_handler(struct ctdb_context *ctdb, uint64_t srvid, for (i=0;inum;i++) { if (dbmap->dbs[i].dbid == recs->db_id) { - persistent = dbmap->dbs[i].persistent; + persistent = dbmap->dbs[i].flags & CTDB_DB_FLAGS_PERSISTENT; break; } } @@ -839,7 +842,7 @@ static void vacuum_fetch_handler(struct ctdb_context *ctdb, uint64_t srvid, } /* attach to it */ - ctdb_db = ctdb_attach(ctdb, name, persistent, 0); + ctdb_db = ctdb_attach(ctdb, CONTROL_TIMEOUT(), name, persistent, 0); if (ctdb_db == NULL) { DEBUG(DEBUG_ERR,(__location__ " Failed to attach to database '%s'\n", name)); talloc_free(tmp_ctx); @@ -1065,6 +1068,7 @@ static int traverse_recdb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, hdr = (struct ctdb_ltdb_header *)data.dptr; if (!params->persistent) { hdr->dmaster = params->ctdb->pnn; + hdr->flags |= CTDB_REC_FLAG_MIGRATED_WITH_DATA; } /* add the record to the blob ready to send to the nodes */ @@ -1277,10 +1281,12 @@ static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb, return -1; } - if (rec->ip_check_disable_ctx == NULL) { - if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->known_public_ips)) { - DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn)); - rec->need_takeover_run = true; + if (ctdb->tunable.disable_ip_failover == 0) { + if (rec->ip_check_disable_ctx == NULL) { + if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->known_public_ips)) { + DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn)); + rec->need_takeover_run = true; + } } } @@ -1382,8 +1388,10 @@ static int do_recovery(struct ctdb_recoverd *rec, DEBUG(DEBUG_ERR,("Taking out recovery lock from recovery daemon\n")); start_time = timeval_current(); if (!ctdb_recovery_lock(ctdb, true)) { - ctdb_set_culprit(rec, pnn); - DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery\n")); + DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery " + "and ban ourself for %u seconds\n", + ctdb->tunable.recovery_ban_period)); + ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period); return -1; } ctdb_ctrl_report_recd_lock_latency(ctdb, CONTROL_TIMEOUT(), timeval_elapsed(&start_time)); @@ -1508,7 +1516,7 @@ static int do_recovery(struct ctdb_recoverd *rec, for (i=0;inum;i++) { ret = recover_database(rec, mem_ctx, dbmap->dbs[i].dbid, - dbmap->dbs[i].persistent, + dbmap->dbs[i].flags & CTDB_DB_FLAGS_PERSISTENT, pnn, nodemap, generation); if (ret != 0) { DEBUG(DEBUG_ERR, (__location__ " Failed to recover database 0x%x\n", dbmap->dbs[i].dbid)); @@ -1622,15 +1630,15 @@ static int do_recovery(struct ctdb_recoverd *rec, if (ret != 0) { DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n", culprit)); + rec->need_takeover_run = true; return -1; } rec->need_takeover_run = false; ret = ctdb_takeover_run(ctdb, nodemap); if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses\n")); - return -1; + DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n")); + rec->need_takeover_run = true; } - DEBUG(DEBUG_NOTICE, (__location__ " Recovery - takeip finished\n")); /* execute the "recovered" event script on all nodes */ ret = run_recovered_eventscript(ctdb, nodemap, "do_recovery"); @@ -1674,9 +1682,9 @@ static int do_recovery(struct ctdb_recoverd *rec, We now wait for rerecovery_timeout before we allow another recovery to take place. */ - DEBUG(DEBUG_NOTICE, (__location__ " New recoveries supressed for the rerecovery timeout\n")); + DEBUG(DEBUG_NOTICE, ("Just finished a recovery. New recoveries will now be supressed for the rerecovery timeout (%d seconds)\n", ctdb->tunable.rerecovery_timeout)); ctdb_wait_timeout(ctdb, ctdb->tunable.rerecovery_timeout); - DEBUG(DEBUG_NOTICE, (__location__ " Rerecovery timeout elapsed. Recovery reactivated.\n")); + DEBUG(DEBUG_NOTICE, ("The rerecovery timeout has elapsed. We now allow recoveries to trigger again.\n")); return 0; } @@ -2043,8 +2051,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb if (ret == 0) { ret = ctdb_takeover_run(ctdb, rec->nodemap); if (ret != 0) { - DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n", - culprit)); + DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n")); rec->need_takeover_run = true; } } @@ -2493,7 +2500,7 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct /* called to check that the local allocation of public ip addresses is ok. */ -static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn) +static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn, struct ctdb_node_map *nodemap) { TALLOC_CTX *mem_ctx = talloc_new(NULL); struct ctdb_control_get_ifaces *ifaces = NULL; @@ -2571,7 +2578,7 @@ static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_rec /* skip the check if we have started but not finished recovery */ if (timeval_compare(&uptime1->last_recovery_finished, &uptime1->last_recovery_started) != 1) { - DEBUG(DEBUG_NOTICE, (__location__ " in the middle of recovery or ip reallocation. skipping public ip address check\n")); + DEBUG(DEBUG_INFO, (__location__ " in the middle of recovery or ip reallocation. skipping public ip address check\n")); talloc_free(mem_ctx); return 0; @@ -2584,20 +2591,28 @@ static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_rec and we dont have ones we shouldnt have. if we find an inconsistency we set recmode to active on the local node and wait for the recmaster - to do a full blown recovery + to do a full blown recovery. + also if the pnn is -1 and we are healthy and can host the ip + we also request a ip reallocation. */ - for (j=0; jnum; j++) { - if (ips->ips[j].pnn == pnn) { - if (!ctdb_sys_have_ip(&ips->ips[j].addr)) { - DEBUG(DEBUG_CRIT,("Public address '%s' is missing and we should serve this ip\n", - ctdb_addr_to_str(&ips->ips[j].addr))); - need_takeover_run = true; - } - } else { - if (ctdb_sys_have_ip(&ips->ips[j].addr)) { - DEBUG(DEBUG_CRIT,("We are still serving a public address '%s' that we should not be serving.\n", - ctdb_addr_to_str(&ips->ips[j].addr))); + if (ctdb->tunable.disable_ip_failover == 0) { + for (j=0; jnum; j++) { + if (ips->ips[j].pnn == -1 && nodemap->nodes[pnn].flags == 0) { + DEBUG(DEBUG_CRIT,("Public address '%s' is not assigned and we could serve this ip\n", + ctdb_addr_to_str(&ips->ips[j].addr))); need_takeover_run = true; + } else if (ips->ips[j].pnn == pnn) { + if (!ctdb_sys_have_ip(&ips->ips[j].addr)) { + DEBUG(DEBUG_CRIT,("Public address '%s' is missing and we should serve this ip\n", + ctdb_addr_to_str(&ips->ips[j].addr))); + need_takeover_run = true; + } + } else { + if (ctdb_sys_have_ip(&ips->ips[j].addr)) { + DEBUG(DEBUG_CRIT,("We are still serving a public address '%s' that we should not be serving.\n", + ctdb_addr_to_str(&ips->ips[j].addr))); + need_takeover_run = true; + } } } } @@ -2758,7 +2773,7 @@ static int check_recovery_lock(struct ctdb_context *ctdb) return -1; } - state->child = fork(); + state->child = ctdb_fork(ctdb); if (state->child == (pid_t)-1) { DEBUG(DEBUG_CRIT,(__location__ " fork() failed in check_reclock child\n")); close(state->fd[0]); @@ -2774,6 +2789,7 @@ static int check_recovery_lock(struct ctdb_context *ctdb) close(state->fd[0]); state->fd[0] = -1; + debug_extra = talloc_asprintf(NULL, "recovery-lock:"); if (pread(ctdb->recovery_lock_fd, &cc, 1, 0) == -1) { DEBUG(DEBUG_CRIT,("failed read from recovery_lock_fd - %s\n", strerror(errno))); cc = RECLOCK_FAILED; @@ -3008,10 +3024,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, rec->reallocate_callers = NULL; } } - /* if there are takeovers requested, perform it and notify the waiters */ - if (rec->reallocate_callers) { - process_ipreallocate_requests(ctdb, rec); - } if (rec->recmaster == (uint32_t)-1) { DEBUG(DEBUG_NOTICE,(__location__ " Initial recovery master set - forcing election\n")); @@ -3115,9 +3127,9 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, /* verify that we have all ip addresses we should have and we dont * have addresses we shouldnt have. */ - if (ctdb->do_checkpublicip) { + if (ctdb->tunable.disable_ip_failover == 0) { if (rec->ip_check_disable_ctx == NULL) { - if (verify_local_ip_allocation(ctdb, rec, pnn) != 0) { + if (verify_local_ip_allocation(ctdb, rec, pnn, nodemap) != 0) { DEBUG(DEBUG_ERR, (__location__ " Public IPs were inconsistent.\n")); } } @@ -3198,6 +3210,11 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, } } + /* if there are takeovers requested, perform it and notify the waiters */ + if (rec->reallocate_callers) { + process_ipreallocate_requests(ctdb, rec); + } + /* get the nodemap for all active remote nodes */ remote_nodemaps = talloc_array(mem_ctx, struct ctdb_node_map *, nodemap->num); @@ -3386,8 +3403,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, if (ret != 0) { DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n", culprit)); - ctdb_set_culprit(rec, culprit); - do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap); + rec->need_takeover_run = true; return; } @@ -3402,9 +3418,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, ret = ctdb_takeover_run(ctdb, nodemap); if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n")); - ctdb_set_culprit(rec, ctdb->pnn); - do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap); + DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. Try again later\n")); return; } @@ -3510,18 +3524,12 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te, struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context); if (kill(ctdb->recoverd_pid, 0) != 0) { - DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Shutting down main daemon\n", (int)ctdb->recoverd_pid)); + DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Trying to restart recovery daemon.\n", (int)ctdb->recoverd_pid)); - ctdb_stop_recoverd(ctdb); - ctdb_stop_keepalive(ctdb); - ctdb_stop_monitoring(ctdb); - ctdb_release_all_ips(ctdb); - if (ctdb->methods != NULL) { - ctdb->methods->shutdown(ctdb); - } - ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN); + event_add_timed(ctdb->ev, ctdb, timeval_zero(), + ctdb_restart_recd, ctdb); - exit(10); + return; } event_add_timed(ctdb->ev, ctdb, @@ -3584,7 +3592,7 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb) srandom(getpid() ^ time(NULL)); - if (switch_from_server_to_client(ctdb) != 0) { + if (switch_from_server_to_client(ctdb, "recoverd") != 0) { DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch recovery daemon into client mode. shutting down.\n")); exit(1); } @@ -3623,3 +3631,13 @@ void ctdb_stop_recoverd(struct ctdb_context *ctdb) DEBUG(DEBUG_NOTICE,("Shutting down recovery daemon\n")); kill(ctdb->recoverd_pid, SIGTERM); } + +static void ctdb_restart_recd(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); + + DEBUG(DEBUG_ERR,("Restarting recovery daemon\n")); + ctdb_stop_recoverd(ctdb); + ctdb_start_recoverd(ctdb); +}