X-Git-Url: http://git.samba.org/?a=blobdiff_plain;f=server%2Fctdb_recoverd.c;h=631f53e89bd4d04908b4b3abc8b1da761d86163a;hb=0fedef0ffba4178126eee9544c5e2db52f5db893;hp=dc1a59d06792fd4e076bad754498dac41c08fd23;hpb=85e5e760cc91eb3157d3a88996ce474491646726;p=sahlberg%2Fctdb.git

diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index dc1a59d0..631f53e8 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -70,6 +70,7 @@ struct ctdb_recoverd {
 #define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
 #define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
 
+static void ctdb_restart_recd(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data);
 
 /*
   ban a node for a period of time
@@ -438,7 +439,8 @@ static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctd
 				return -1;
 			}
 			ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, 
-					   mem_ctx, name, dbmap->dbs[db].persistent);
+					   mem_ctx, name,
+					   dbmap->dbs[db].flags & CTDB_DB_FLAGS_PERSISTENT);
 			if (ret != 0) {
 				DEBUG(DEBUG_ERR, (__location__ " Unable to create remote db:%s\n", name));
 				return -1;
@@ -501,7 +503,7 @@ static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb
 				return -1;
 			}
 			ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, name, 
-					   remote_dbmap->dbs[db].persistent);
+					   remote_dbmap->dbs[db].flags & CTDB_DB_FLAGS_PERSISTENT);
 			if (ret != 0) {
 				DEBUG(DEBUG_ERR, (__location__ " Unable to create local db:%s\n", name));
 				return -1;
@@ -713,6 +715,7 @@ static void vacuum_fetch_next(struct vacuum_info *v)
 		ZERO_STRUCT(call);
 		call.call_id = CTDB_NULL_FUNC;
 		call.flags = CTDB_IMMEDIATE_MIGRATION;
+		call.flags |= CTDB_CALL_FLAG_VACUUM_MIGRATION;
 
 		r = v->r;
 		v->r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r);
@@ -821,7 +824,7 @@ static void vacuum_fetch_handler(struct ctdb_context *ctdb, uint64_t srvid,
 
 	for (i=0;i<dbmap->num;i++) {
 		if (dbmap->dbs[i].dbid == recs->db_id) {
-			persistent = dbmap->dbs[i].persistent;
+			persistent = dbmap->dbs[i].flags & CTDB_DB_FLAGS_PERSISTENT;
 			break;
 		}
 	}
@@ -839,7 +842,7 @@ static void vacuum_fetch_handler(struct ctdb_context *ctdb, uint64_t srvid,
 	}
 
 	/* attach to it */
-	ctdb_db = ctdb_attach(ctdb, name, persistent, 0);
+	ctdb_db = ctdb_attach(ctdb, CONTROL_TIMEOUT(), name, persistent, 0);
 	if (ctdb_db == NULL) {
 		DEBUG(DEBUG_ERR,(__location__ " Failed to attach to database '%s'\n", name));
 		talloc_free(tmp_ctx);
@@ -1065,6 +1068,7 @@ static int traverse_recdb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
 	hdr = (struct ctdb_ltdb_header *)data.dptr;
 	if (!params->persistent) {
 		hdr->dmaster = params->ctdb->pnn;
+		hdr->flags |= CTDB_REC_FLAG_MIGRATED_WITH_DATA;
 	}
 
 	/* add the record to the blob ready to send to the nodes */
@@ -1277,10 +1281,12 @@ static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
 			return -1;
 		}
 
-		if (rec->ip_check_disable_ctx == NULL) {
-			if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->known_public_ips)) {
-				DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn));
-				rec->need_takeover_run = true;
+		if (ctdb->tunable.disable_ip_failover == 0) {
+			if (rec->ip_check_disable_ctx == NULL) {
+				if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->known_public_ips)) {
+					DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn));
+					rec->need_takeover_run = true;
+				}
 			}
 		}
 
@@ -1382,8 +1388,10 @@ static int do_recovery(struct ctdb_recoverd *rec,
 		DEBUG(DEBUG_ERR,("Taking out recovery lock from recovery daemon\n"));
 		start_time = timeval_current();
 		if (!ctdb_recovery_lock(ctdb, true)) {
-			ctdb_set_culprit(rec, pnn);
-			DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery\n"));
+			DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery "
+					 "and ban ourself for %u seconds\n",
+					 ctdb->tunable.recovery_ban_period));
+			ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
 			return -1;
 		}
 		ctdb_ctrl_report_recd_lock_latency(ctdb, CONTROL_TIMEOUT(), timeval_elapsed(&start_time));
@@ -1508,7 +1516,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
 	for (i=0;i<dbmap->num;i++) {
 		ret = recover_database(rec, mem_ctx,
 				       dbmap->dbs[i].dbid,
-				       dbmap->dbs[i].persistent,
+				       dbmap->dbs[i].flags & CTDB_DB_FLAGS_PERSISTENT,
 				       pnn, nodemap, generation);
 		if (ret != 0) {
 			DEBUG(DEBUG_ERR, (__location__ " Failed to recover database 0x%x\n", dbmap->dbs[i].dbid));
@@ -1622,15 +1630,15 @@ static int do_recovery(struct ctdb_recoverd *rec,
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 				 culprit));
+		rec->need_takeover_run = true;
 		return -1;
 	}
 	rec->need_takeover_run = false;
 	ret = ctdb_takeover_run(ctdb, nodemap);
 	if (ret != 0) {
-		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses\n"));
-		return -1;
+		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
+		rec->need_takeover_run = true;
 	}
-	DEBUG(DEBUG_NOTICE, (__location__ " Recovery - takeip finished\n"));
 
 	/* execute the "recovered" event script on all nodes */
 	ret = run_recovered_eventscript(ctdb, nodemap, "do_recovery");
@@ -1674,9 +1682,9 @@ static int do_recovery(struct ctdb_recoverd *rec,
 	   We now wait for rerecovery_timeout before we allow 
 	   another recovery to take place.
 	*/
-	DEBUG(DEBUG_NOTICE, (__location__ " New recoveries supressed for the rerecovery timeout\n"));
+	DEBUG(DEBUG_NOTICE, ("Just finished a recovery. New recoveries will now be supressed for the rerecovery timeout (%d seconds)\n", ctdb->tunable.rerecovery_timeout));
 	ctdb_wait_timeout(ctdb, ctdb->tunable.rerecovery_timeout);
-	DEBUG(DEBUG_NOTICE, (__location__ " Rerecovery timeout elapsed. Recovery reactivated.\n"));
+	DEBUG(DEBUG_NOTICE, ("The rerecovery timeout has elapsed. We now allow recoveries to trigger again.\n"));
 
 	return 0;
 }
@@ -2043,8 +2051,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
 	if (ret == 0) {
 		ret = ctdb_takeover_run(ctdb, rec->nodemap);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
-					 culprit));
+			DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n"));
 			rec->need_takeover_run = true;
 		}
 	}
@@ -2493,7 +2500,7 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct
 
 /* called to check that the local allocation of public ip addresses is ok.
 */
-static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn)
+static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn, struct ctdb_node_map *nodemap)
 {
 	TALLOC_CTX *mem_ctx = talloc_new(NULL);
 	struct ctdb_control_get_ifaces *ifaces = NULL;
@@ -2571,7 +2578,7 @@ static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_rec
 	/* skip the check if we have started but not finished recovery */
 	if (timeval_compare(&uptime1->last_recovery_finished,
 			    &uptime1->last_recovery_started) != 1) {
-		DEBUG(DEBUG_NOTICE, (__location__ " in the middle of recovery or ip reallocation. skipping public ip address check\n"));
+		DEBUG(DEBUG_INFO, (__location__ " in the middle of recovery or ip reallocation. skipping public ip address check\n"));
 		talloc_free(mem_ctx);
 
 		return 0;
@@ -2584,20 +2591,28 @@ static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_rec
 	   and we dont have ones we shouldnt have.
 	   if we find an inconsistency we set recmode to
 	   active on the local node and wait for the recmaster
-	   to do a full blown recovery
+	   to do a full blown recovery.
+	   also if the pnn is -1 and we are healthy and can host the ip
+	   we also request a ip reallocation.
 	*/
-	for (j=0; j<ips->num; j++) {
-		if (ips->ips[j].pnn == pnn) {
-			if (!ctdb_sys_have_ip(&ips->ips[j].addr)) {
-				DEBUG(DEBUG_CRIT,("Public address '%s' is missing and we should serve this ip\n",
-					ctdb_addr_to_str(&ips->ips[j].addr)));
-				need_takeover_run = true;
-			}
-		} else {
-			if (ctdb_sys_have_ip(&ips->ips[j].addr)) {
-				DEBUG(DEBUG_CRIT,("We are still serving a public address '%s' that we should not be serving.\n", 
-					ctdb_addr_to_str(&ips->ips[j].addr)));
+	if (ctdb->tunable.disable_ip_failover == 0) {
+		for (j=0; j<ips->num; j++) {
+			if (ips->ips[j].pnn == -1 && nodemap->nodes[pnn].flags == 0) {
+				DEBUG(DEBUG_CRIT,("Public address '%s' is not assigned and we could serve this ip\n",
+						ctdb_addr_to_str(&ips->ips[j].addr)));
 				need_takeover_run = true;
+			} else if (ips->ips[j].pnn == pnn) {
+				if (!ctdb_sys_have_ip(&ips->ips[j].addr)) {
+					DEBUG(DEBUG_CRIT,("Public address '%s' is missing and we should serve this ip\n",
+						ctdb_addr_to_str(&ips->ips[j].addr)));
+					need_takeover_run = true;
+				}
+			} else {
+				if (ctdb_sys_have_ip(&ips->ips[j].addr)) {
+					DEBUG(DEBUG_CRIT,("We are still serving a public address '%s' that we should not be serving.\n", 
+						ctdb_addr_to_str(&ips->ips[j].addr)));
+					need_takeover_run = true;
+				}
 			}
 		}
 	}
@@ -2758,7 +2773,7 @@ static int check_recovery_lock(struct ctdb_context *ctdb)
 		return -1;
 	}
 
-	state->child = fork();
+	state->child = ctdb_fork(ctdb);
 	if (state->child == (pid_t)-1) {
 		DEBUG(DEBUG_CRIT,(__location__ " fork() failed in check_reclock child\n"));
 		close(state->fd[0]);
@@ -2774,6 +2789,7 @@ static int check_recovery_lock(struct ctdb_context *ctdb)
 		close(state->fd[0]);
 		state->fd[0] = -1;
 
+		debug_extra = talloc_asprintf(NULL, "recovery-lock:");
 		if (pread(ctdb->recovery_lock_fd, &cc, 1, 0) == -1) {
 			DEBUG(DEBUG_CRIT,("failed read from recovery_lock_fd - %s\n", strerror(errno)));
 			cc = RECLOCK_FAILED;
@@ -3008,10 +3024,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 			rec->reallocate_callers = NULL;
 		}
 	}
-	/* if there are takeovers requested, perform it and notify the waiters */
-	if (rec->reallocate_callers) {
-		process_ipreallocate_requests(ctdb, rec);
-	}
 
 	if (rec->recmaster == (uint32_t)-1) {
 		DEBUG(DEBUG_NOTICE,(__location__ " Initial recovery master set - forcing election\n"));
@@ -3115,9 +3127,9 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 	/* verify that we have all ip addresses we should have and we dont
 	 * have addresses we shouldnt have.
 	 */ 
-	if (ctdb->do_checkpublicip) {
+	if (ctdb->tunable.disable_ip_failover == 0) {
 		if (rec->ip_check_disable_ctx == NULL) {
-			if (verify_local_ip_allocation(ctdb, rec, pnn) != 0) {
+			if (verify_local_ip_allocation(ctdb, rec, pnn, nodemap) != 0) {
 				DEBUG(DEBUG_ERR, (__location__ " Public IPs were inconsistent.\n"));
 			}
 		}
@@ -3198,6 +3210,11 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		}
 	}
 
+	/* if there are takeovers requested, perform it and notify the waiters */
+	if (rec->reallocate_callers) {
+		process_ipreallocate_requests(ctdb, rec);
+	}
+
 	/* get the nodemap for all active remote nodes
 	 */
 	remote_nodemaps = talloc_array(mem_ctx, struct ctdb_node_map *, nodemap->num);
@@ -3386,8 +3403,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		if (ret != 0) {
 			DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 					 culprit));
-			ctdb_set_culprit(rec, culprit);
-			do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+			rec->need_takeover_run = true;
 			return;
 		}
 
@@ -3402,9 +3418,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 
 		ret = ctdb_takeover_run(ctdb, nodemap);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
-			ctdb_set_culprit(rec, ctdb->pnn);
-			do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+			DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. Try again later\n"));
 			return;
 		}
 
@@ -3510,18 +3524,12 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
 	struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
 
 	if (kill(ctdb->recoverd_pid, 0) != 0) {
-		DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Shutting down main daemon\n", (int)ctdb->recoverd_pid));
+		DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Trying to restart recovery daemon.\n", (int)ctdb->recoverd_pid));
 
-		ctdb_stop_recoverd(ctdb);
-		ctdb_stop_keepalive(ctdb);
-		ctdb_stop_monitoring(ctdb);
-		ctdb_release_all_ips(ctdb);
-		if (ctdb->methods != NULL) {
-			ctdb->methods->shutdown(ctdb);
-		}
-		ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
+		event_add_timed(ctdb->ev, ctdb, timeval_zero(), 
+				ctdb_restart_recd, ctdb);
 
-		exit(10);	
+		return;
 	}
 
 	event_add_timed(ctdb->ev, ctdb, 
@@ -3584,7 +3592,7 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
 
 	srandom(getpid() ^ time(NULL));
 
-	if (switch_from_server_to_client(ctdb) != 0) {
+	if (switch_from_server_to_client(ctdb, "recoverd") != 0) {
 		DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch recovery daemon into client mode. shutting down.\n"));
 		exit(1);
 	}
@@ -3623,3 +3631,13 @@ void ctdb_stop_recoverd(struct ctdb_context *ctdb)
 	DEBUG(DEBUG_NOTICE,("Shutting down recovery daemon\n"));
 	kill(ctdb->recoverd_pid, SIGTERM);
 }
+
+static void ctdb_restart_recd(struct event_context *ev, struct timed_event *te, 
+		       struct timeval t, void *private_data)
+{
+	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+
+	DEBUG(DEBUG_ERR,("Restarting recovery daemon\n"));
+	ctdb_stop_recoverd(ctdb);
+	ctdb_start_recoverd(ctdb);
+}