4 Copyright (C) Ronnie Sahlberg 2007
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "system/filesys.h"
23 #include "system/time.h"
26 #include "../include/ctdb.h"
27 #include "../include/ctdb_private.h"
31 struct ctdb_recoverd *rec;
36 private state of recovery daemon
38 struct ctdb_recoverd {
39 struct ctdb_context *ctdb;
40 uint32_t last_culprit;
41 uint32_t culprit_counter;
42 struct timeval first_recover_time;
43 struct ban_state **banned_nodes;
44 struct timeval priority_time;
47 #define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
48 #define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
53 static void ctdb_unban_node(struct ctdb_recoverd *rec, uint32_t vnn)
55 struct ctdb_context *ctdb = rec->ctdb;
57 if (!ctdb_validate_vnn(ctdb, vnn)) {
58 DEBUG(0,("Bad vnn %u in ctdb_ban_node\n", vnn));
62 if (rec->banned_nodes[vnn] == NULL) {
66 ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), vnn, 0, NODE_FLAGS_BANNED);
68 talloc_free(rec->banned_nodes[vnn]);
69 rec->banned_nodes[vnn] = NULL;
74 called when a ban has timed out
76 static void ctdb_ban_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
78 struct ban_state *state = talloc_get_type(p, struct ban_state);
79 struct ctdb_recoverd *rec = state->rec;
80 uint32_t vnn = state->banned_node;
82 DEBUG(0,("Node %u is now unbanned\n", vnn));
83 ctdb_unban_node(rec, vnn);
87 ban a node for a period of time
89 static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t vnn, uint32_t ban_time)
91 struct ctdb_context *ctdb = rec->ctdb;
93 if (!ctdb_validate_vnn(ctdb, vnn)) {
94 DEBUG(0,("Bad vnn %u in ctdb_ban_node\n", vnn));
98 if (vnn == ctdb->vnn) {
99 DEBUG(0,("self ban - lowering our election priority\n"));
100 /* banning ourselves - lower our election priority */
101 rec->priority_time = timeval_current();
104 ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), vnn, NODE_FLAGS_BANNED, 0);
106 rec->banned_nodes[vnn] = talloc(rec, struct ban_state);
107 CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes[vnn]);
109 rec->banned_nodes[vnn]->rec = rec;
110 rec->banned_nodes[vnn]->banned_node = vnn;
113 event_add_timed(ctdb->ev, rec->banned_nodes[vnn],
114 timeval_current_ofs(ban_time, 0),
115 ctdb_ban_timeout, rec->banned_nodes[vnn]);
121 change recovery mode on all nodes
123 static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode)
127 /* start the freeze process immediately on all nodes */
128 ctdb_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
129 CTDB_CONTROL_FREEZE, CTDB_CTRL_FLAG_NOREPLY, tdb_null,
130 NULL, NULL, NULL, NULL, NULL);
132 /* set recovery mode to active on all nodes */
133 for (j=0; j<nodemap->num; j++) {
134 /* dont change it for nodes that are unavailable */
135 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
139 if (rec_mode == CTDB_RECOVERY_ACTIVE) {
140 ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn);
142 DEBUG(0, (__location__ " Unable to freeze node %u\n", nodemap->nodes[j].vnn));
147 ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, rec_mode);
149 DEBUG(0, (__location__ " Unable to set recmode on node %u\n", nodemap->nodes[j].vnn));
153 if (rec_mode == CTDB_RECOVERY_NORMAL) {
154 ret = ctdb_ctrl_thaw(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn);
156 DEBUG(0, (__location__ " Unable to thaw node %u\n", nodemap->nodes[j].vnn));
166 change recovery master on all node
168 static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn)
172 /* set recovery master to vnn on all nodes */
173 for (j=0; j<nodemap->num; j++) {
174 /* dont change it for nodes that are unavailable */
175 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
179 ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, vnn);
181 DEBUG(0, (__location__ " Unable to set recmaster on node %u\n", nodemap->nodes[j].vnn));
191 ensure all other nodes have attached to any databases that we have
193 static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
194 uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
197 struct ctdb_dbid_map *remote_dbmap;
199 /* verify that all other nodes have all our databases */
200 for (j=0; j<nodemap->num; j++) {
201 /* we dont need to ourself ourselves */
202 if (nodemap->nodes[j].vnn == vnn) {
205 /* dont check nodes that are unavailable */
206 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
210 ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn,
211 mem_ctx, &remote_dbmap);
213 DEBUG(0, (__location__ " Unable to get dbids from node %u\n", vnn));
217 /* step through all local databases */
218 for (db=0; db<dbmap->num;db++) {
222 for (i=0;i<remote_dbmap->num;i++) {
223 if (dbmap->dbids[db] == remote_dbmap->dbids[i]) {
227 /* the remote node already have this database */
228 if (i!=remote_dbmap->num) {
231 /* ok so we need to create this database */
232 ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), vnn, dbmap->dbids[db], mem_ctx, &name);
234 DEBUG(0, (__location__ " Unable to get dbname from node %u\n", vnn));
237 ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, mem_ctx, name);
239 DEBUG(0, (__location__ " Unable to create remote db:%s\n", name));
250 ensure we are attached to any databases that anyone else is attached to
252 static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
253 uint32_t vnn, struct ctdb_dbid_map **dbmap, TALLOC_CTX *mem_ctx)
256 struct ctdb_dbid_map *remote_dbmap;
258 /* verify that we have all database any other node has */
259 for (j=0; j<nodemap->num; j++) {
260 /* we dont need to ourself ourselves */
261 if (nodemap->nodes[j].vnn == vnn) {
264 /* dont check nodes that are unavailable */
265 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
269 ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn,
270 mem_ctx, &remote_dbmap);
272 DEBUG(0, (__location__ " Unable to get dbids from node %u\n", vnn));
276 /* step through all databases on the remote node */
277 for (db=0; db<remote_dbmap->num;db++) {
280 for (i=0;i<(*dbmap)->num;i++) {
281 if (remote_dbmap->dbids[db] == (*dbmap)->dbids[i]) {
285 /* we already have this db locally */
286 if (i!=(*dbmap)->num) {
289 /* ok so we need to create this database and
292 ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn,
293 remote_dbmap->dbids[db], mem_ctx, &name);
295 DEBUG(0, (__location__ " Unable to get dbname from node %u\n",
296 nodemap->nodes[j].vnn));
299 ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, name);
301 DEBUG(0, (__location__ " Unable to create local db:%s\n", name));
304 ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, dbmap);
306 DEBUG(0, (__location__ " Unable to reread dbmap on node %u\n", vnn));
317 pull all the remote database contents into ours
319 static int pull_all_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
320 uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
324 /* pull all records from all other nodes across onto this node
325 (this merges based on rsn)
327 for (i=0;i<dbmap->num;i++) {
328 for (j=0; j<nodemap->num; j++) {
329 /* we dont need to merge with ourselves */
330 if (nodemap->nodes[j].vnn == vnn) {
333 /* dont merge from nodes that are unavailable */
334 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
337 ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn,
338 vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx);
340 DEBUG(0, (__location__ " Unable to copy db from node %u to node %u\n",
341 nodemap->nodes[j].vnn, vnn));
352 change the dmaster on all databases to point to us
354 static int update_dmaster_on_all_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
355 uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
359 /* update dmaster to point to this node for all databases/nodes */
360 for (i=0;i<dbmap->num;i++) {
361 for (j=0; j<nodemap->num; j++) {
362 /* dont repoint nodes that are unavailable */
363 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
366 ret = ctdb_ctrl_setdmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], vnn);
368 DEBUG(0, (__location__ " Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]));
379 update flags on all active nodes
381 static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
384 for (i=0;i<nodemap->num;i++) {
385 struct ctdb_node_flag_change c;
388 c.vnn = nodemap->nodes[i].vnn;
389 c.old_flags = nodemap->nodes[i].flags;
390 c.new_flags = nodemap->nodes[i].flags;
392 data.dptr = (uint8_t *)&c;
393 data.dsize = sizeof(c);
395 ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
396 CTDB_SRVID_NODE_FLAGS_CHANGED, data);
405 static int vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb_node_map *nodemap)
410 /* find max rsn on our local node for this db */
411 ret = ctdb_ctrl_get_max_rsn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, db_id, &max_rsn);
416 /* set rsn on non-empty records to max_rsn+1 */
417 for (i=0;i<nodemap->num;i++) {
418 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
421 ret = ctdb_ctrl_set_rsn_nonempty(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn,
424 DEBUG(0,(__location__ " Failed to set rsn on node %u to %llu\n",
425 nodemap->nodes[i].vnn, (unsigned long long)max_rsn+1));
430 /* delete records with rsn < max_rsn+1 on all nodes */
431 for (i=0;i<nodemap->num;i++) {
432 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
435 ret = ctdb_ctrl_delete_low_rsn(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn,
438 DEBUG(0,(__location__ " Failed to delete records on node %u with rsn below %llu\n",
439 nodemap->nodes[i].vnn, (unsigned long long)max_rsn+1));
450 vacuum all attached databases
452 static int vacuum_all_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
453 struct ctdb_dbid_map *dbmap)
457 /* update dmaster to point to this node for all databases/nodes */
458 for (i=0;i<dbmap->num;i++) {
459 if (vacuum_db(ctdb, dbmap->dbids[i], nodemap) != 0) {
468 push out all our database contents to all other nodes
470 static int push_all_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
471 uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
475 /* push all records out to the nodes again */
476 for (i=0;i<dbmap->num;i++) {
477 for (j=0; j<nodemap->num; j++) {
478 /* we dont need to push to ourselves */
479 if (nodemap->nodes[j].vnn == vnn) {
482 /* dont push to nodes that are unavailable */
483 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
486 ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), vnn, nodemap->nodes[j].vnn,
487 dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx);
489 DEBUG(0, (__location__ " Unable to copy db from node %u to node %u\n",
490 vnn, nodemap->nodes[j].vnn));
501 ensure all nodes have the same vnnmap we do
503 static int update_vnnmap_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
504 uint32_t vnn, struct ctdb_vnn_map *vnnmap, TALLOC_CTX *mem_ctx)
508 /* push the new vnn map out to all the nodes */
509 for (j=0; j<nodemap->num; j++) {
510 /* dont push to nodes that are unavailable */
511 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
515 ret = ctdb_ctrl_setvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, mem_ctx, vnnmap);
517 DEBUG(0, (__location__ " Unable to set vnnmap for node %u\n", vnn));
527 handler for when the admin bans a node
529 static void ban_handler(struct ctdb_context *ctdb, uint64_t srvid,
530 TDB_DATA data, void *private_data)
532 struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
533 struct ctdb_ban_info *b = (struct ctdb_ban_info *)data.dptr;
537 if (data.dsize != sizeof(*b)) {
538 DEBUG(0,("Bad data in ban_handler\n"));
542 ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
544 DEBUG(0,(__location__ " Failed to find the recmaster\n"));
548 if (recmaster != ctdb->vnn) {
549 DEBUG(0,("We are not the recmaster - ignoring ban request\n"));
553 DEBUG(0,("Node %u has been banned for %u seconds by the administrator\n",
554 b->vnn, b->ban_time));
555 ctdb_ban_node(rec, b->vnn, b->ban_time);
559 handler for when the admin unbans a node
561 static void unban_handler(struct ctdb_context *ctdb, uint64_t srvid,
562 TDB_DATA data, void *private_data)
564 struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
569 if (data.dsize != sizeof(uint32_t)) {
570 DEBUG(0,("Bad data in unban_handler\n"));
573 vnn = *(uint32_t *)data.dptr;
575 ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
577 DEBUG(0,(__location__ " Failed to find the recmaster\n"));
581 if (recmaster != ctdb->vnn) {
582 DEBUG(0,("We are not the recmaster - ignoring unban request\n"));
586 DEBUG(0,("Node %u has been unbanned by the administrator\n", vnn));
587 ctdb_unban_node(rec, vnn);
593 called when ctdb_wait_timeout should finish
595 static void ctdb_wait_handler(struct event_context *ev, struct timed_event *te,
596 struct timeval yt, void *p)
598 uint32_t *timed_out = (uint32_t *)p;
603 wait for a given number of seconds
605 static void ctdb_wait_timeout(struct ctdb_context *ctdb, uint32_t secs)
607 uint32_t timed_out = 0;
608 event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(secs, 0), ctdb_wait_handler, &timed_out);
610 event_loop_once(ctdb->ev);
614 /* Create a new random generation ip.
615 The generation id can not be the INVALID_GENERATION id
617 static uint32_t new_generation(void)
622 generation = random();
624 if (generation != INVALID_GENERATION) {
633 we are the recmaster, and recovery is needed - start a recovery run
635 static int do_recovery(struct ctdb_recoverd *rec,
636 TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active,
637 struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap,
640 struct ctdb_context *ctdb = rec->ctdb;
643 struct ctdb_dbid_map *dbmap;
645 if (rec->last_culprit != culprit ||
646 timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) {
647 /* either a new node is the culprit, or we've decide to forgive them */
648 rec->last_culprit = culprit;
649 rec->first_recover_time = timeval_current();
650 rec->culprit_counter = 0;
652 rec->culprit_counter++;
654 if (rec->culprit_counter > 2*nodemap->num) {
655 DEBUG(0,("Node %u has caused %u recoveries in %.0f seconds - banning it for %u seconds\n",
656 culprit, rec->culprit_counter, timeval_elapsed(&rec->first_recover_time),
657 ctdb->tunable.recovery_ban_period));
658 ctdb_ban_node(rec, culprit, ctdb->tunable.recovery_ban_period);
661 if (!ctdb_recovery_lock(ctdb, true)) {
662 DEBUG(0,("Unable to get recovery lock - aborting recovery\n"));
666 /* set recovery mode to active on all nodes */
667 ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
669 DEBUG(0, (__location__ " Unable to set recovery mode to active on cluster\n"));
673 DEBUG(0, (__location__ " Recovery initiated due to problem with node %u\n", culprit));
675 /* pick a new generation number */
676 generation = new_generation();
678 /* change the vnnmap on this node to use the new generation
679 number but not on any other nodes.
680 this guarantees that if we abort the recovery prematurely
681 for some reason (a node stops responding?)
682 that we can just return immediately and we will reenter
683 recovery shortly again.
684 I.e. we deliberately leave the cluster with an inconsistent
685 generation id to allow us to abort recovery at any stage and
686 just restart it from scratch.
688 vnnmap->generation = generation;
689 ret = ctdb_ctrl_setvnnmap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, vnnmap);
691 DEBUG(0, (__location__ " Unable to set vnnmap for node %u\n", vnn));
695 /* get a list of all databases */
696 ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, &dbmap);
698 DEBUG(0, (__location__ " Unable to get dbids from node :%u\n", vnn));
704 /* verify that all other nodes have all our databases */
705 ret = create_missing_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
707 DEBUG(0, (__location__ " Unable to create missing remote databases\n"));
711 /* verify that we have all the databases any other node has */
712 ret = create_missing_local_databases(ctdb, nodemap, vnn, &dbmap, mem_ctx);
714 DEBUG(0, (__location__ " Unable to create missing local databases\n"));
720 /* verify that all other nodes have all our databases */
721 ret = create_missing_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
723 DEBUG(0, (__location__ " Unable to create missing remote databases\n"));
728 DEBUG(1, (__location__ " Recovery - created remote databases\n"));
730 /* pull all remote databases onto the local node */
731 ret = pull_all_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
733 DEBUG(0, (__location__ " Unable to pull remote databases\n"));
737 DEBUG(1, (__location__ " Recovery - pulled remote databases\n"));
739 /* push all local databases to the remote nodes */
740 ret = push_all_local_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
742 DEBUG(0, (__location__ " Unable to push local databases\n"));
746 DEBUG(1, (__location__ " Recovery - pushed remote databases\n"));
748 /* build a new vnn map with all the currently active and
750 generation = new_generation();
751 vnnmap = talloc(mem_ctx, struct ctdb_vnn_map);
752 CTDB_NO_MEMORY(ctdb, vnnmap);
753 vnnmap->generation = generation;
754 vnnmap->size = num_active;
755 vnnmap->map = talloc_zero_array(vnnmap, uint32_t, vnnmap->size);
756 for (i=j=0;i<nodemap->num;i++) {
757 if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
758 vnnmap->map[j++] = nodemap->nodes[i].vnn;
764 /* update to the new vnnmap on all nodes */
765 ret = update_vnnmap_on_all_nodes(ctdb, nodemap, vnn, vnnmap, mem_ctx);
767 DEBUG(0, (__location__ " Unable to update vnnmap on all nodes\n"));
771 DEBUG(1, (__location__ " Recovery - updated vnnmap\n"));
773 /* update recmaster to point to us for all nodes */
774 ret = set_recovery_master(ctdb, nodemap, vnn);
776 DEBUG(0, (__location__ " Unable to set recovery master\n"));
780 DEBUG(1, (__location__ " Recovery - updated recmaster\n"));
782 /* repoint all local and remote database records to the local
783 node as being dmaster
785 ret = update_dmaster_on_all_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
787 DEBUG(0, (__location__ " Unable to update dmaster on all databases\n"));
791 DEBUG(1, (__location__ " Recovery - updated dmaster on all databases\n"));
794 update all nodes to have the same flags that we have
796 ret = update_flags_on_all_nodes(ctdb, nodemap);
798 DEBUG(0, (__location__ " Unable to update flags on all nodes\n"));
802 DEBUG(1, (__location__ " Recovery - updated flags\n"));
805 run a vacuum operation on empty records
807 ret = vacuum_all_databases(ctdb, nodemap, dbmap);
809 DEBUG(0, (__location__ " Unable to vacuum all databases\n"));
813 DEBUG(1, (__location__ " Recovery - vacuumed all databases\n"));
816 if enabled, tell nodes to takeover their public IPs
818 if (ctdb->takeover.enabled) {
819 ret = ctdb_takeover_run(ctdb, nodemap);
821 DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
824 DEBUG(1, (__location__ " Recovery - done takeover\n"));
828 /* disable recovery mode */
829 ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_NORMAL);
831 DEBUG(0, (__location__ " Unable to set recovery mode to normal on cluster\n"));
835 /* send a message to all clients telling them that the cluster
836 has been reconfigured */
837 ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_RECONFIGURE, tdb_null);
839 DEBUG(0, (__location__ " Recovery complete\n"));
841 /* We just finished a recovery successfully.
842 We now wait for rerecovery_timeout before we allow
843 another recovery to take place.
845 DEBUG(0, (__location__ " New recoveries supressed for the rerecovery timeout\n"));
846 ctdb_wait_timeout(ctdb, ctdb->tunable.rerecovery_timeout);
847 DEBUG(0, (__location__ " Rerecovery timeout elapsed. Recovery reactivated.\n"));
854 elections are won by first checking the number of connected nodes, then
855 the priority time, then the vnn
857 struct election_message {
858 uint32_t num_connected;
859 struct timeval priority_time;
864 form this nodes election data
866 static void ctdb_election_data(struct ctdb_recoverd *rec, struct election_message *em)
869 struct ctdb_node_map *nodemap;
870 struct ctdb_context *ctdb = rec->ctdb;
874 em->vnn = rec->ctdb->vnn;
875 em->priority_time = rec->priority_time;
877 ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, rec, &nodemap);
882 for (i=0;i<nodemap->num;i++) {
883 if (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
887 talloc_free(nodemap);
891 see if the given election data wins
893 static bool ctdb_election_win(struct ctdb_recoverd *rec, struct election_message *em)
895 struct election_message myem;
898 ctdb_election_data(rec, &myem);
900 /* try to use the most connected node */
901 cmp = (int)myem.num_connected - (int)em->num_connected;
903 /* then the longest running node */
905 cmp = timeval_compare(&em->priority_time, &myem.priority_time);
909 cmp = (int)myem.vnn - (int)em->vnn;
916 send out an election request
918 static int send_election_request(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx, uint32_t vnn)
921 TDB_DATA election_data;
922 struct election_message emsg;
924 struct ctdb_context *ctdb = rec->ctdb;
926 srvid = CTDB_SRVID_RECOVERY;
928 ctdb_election_data(rec, &emsg);
930 election_data.dsize = sizeof(struct election_message);
931 election_data.dptr = (unsigned char *)&emsg;
934 /* first we assume we will win the election and set
935 recoverymaster to be ourself on the current node
937 ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), vnn, vnn);
939 DEBUG(0, (__location__ " failed to send recmaster election request\n"));
944 /* send an election message to all active nodes */
945 ctdb_send_message(ctdb, CTDB_BROADCAST_ALL, srvid, election_data);
951 this function will unban all nodes in the cluster
953 static void unban_all_nodes(struct ctdb_context *ctdb)
956 struct ctdb_node_map *nodemap;
957 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
959 ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);
961 DEBUG(0,(__location__ " failed to get nodemap to unban all nodes\n"));
965 for (i=0;i<nodemap->num;i++) {
966 if ( (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED))
967 && (nodemap->nodes[i].flags & NODE_FLAGS_BANNED) ) {
968 ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn, 0, NODE_FLAGS_BANNED);
972 talloc_free(tmp_ctx);
976 handler for recovery master elections
978 static void election_handler(struct ctdb_context *ctdb, uint64_t srvid,
979 TDB_DATA data, void *private_data)
981 struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
983 struct election_message *em = (struct election_message *)data.dptr;
986 mem_ctx = talloc_new(ctdb);
988 /* someone called an election. check their election data
989 and if we disagree and we would rather be the elected node,
990 send a new election message to all other nodes
992 if (ctdb_election_win(rec, em)) {
993 ret = send_election_request(rec, mem_ctx, ctdb_get_vnn(ctdb));
995 DEBUG(0, (__location__ " failed to initiate recmaster election"));
997 talloc_free(mem_ctx);
998 /*unban_all_nodes(ctdb);*/
1002 /* release the recmaster lock */
1003 if (em->vnn != ctdb->vnn &&
1004 ctdb->recovery_lock_fd != -1) {
1005 close(ctdb->recovery_lock_fd);
1006 ctdb->recovery_lock_fd = -1;
1007 unban_all_nodes(ctdb);
1010 /* ok, let that guy become recmaster then */
1011 ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), ctdb_get_vnn(ctdb), em->vnn);
1013 DEBUG(0, (__location__ " failed to send recmaster election request"));
1014 talloc_free(mem_ctx);
1018 /* release any bans */
1019 rec->last_culprit = (uint32_t)-1;
1020 talloc_free(rec->banned_nodes);
1021 rec->banned_nodes = talloc_zero_array(rec, struct ban_state *, ctdb->num_nodes);
1022 CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);
1024 talloc_free(mem_ctx);
1030 force the start of the election process
1032 static void force_election(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx, uint32_t vnn,
1033 struct ctdb_node_map *nodemap)
1036 struct ctdb_context *ctdb = rec->ctdb;
1038 /* set all nodes to recovery mode to stop all internode traffic */
1039 ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
1041 DEBUG(0, (__location__ " Unable to set recovery mode to active on cluster\n"));
1045 ret = send_election_request(rec, mem_ctx, vnn);
1047 DEBUG(0, (__location__ " failed to initiate recmaster election"));
1051 /* wait for a few seconds to collect all responses */
1052 ctdb_wait_timeout(ctdb, ctdb->tunable.election_timeout);
1058 handler for when a node changes its flags
1060 static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
1061 TDB_DATA data, void *private_data)
1064 struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
1065 struct ctdb_node_map *nodemap=NULL;
1066 TALLOC_CTX *tmp_ctx;
1067 uint32_t changed_flags;
1070 if (data.dsize != sizeof(*c)) {
1071 DEBUG(0,(__location__ "Invalid data in ctdb_node_flag_change\n"));
1075 tmp_ctx = talloc_new(ctdb);
1076 CTDB_NO_MEMORY_VOID(ctdb, tmp_ctx);
1078 ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);
1080 for (i=0;i<nodemap->num;i++) {
1081 if (nodemap->nodes[i].vnn == c->vnn) break;
1084 if (i == nodemap->num) {
1085 DEBUG(0,(__location__ "Flag change for non-existant node %u\n", c->vnn));
1086 talloc_free(tmp_ctx);
1090 changed_flags = c->old_flags ^ c->new_flags;
1092 /* Dont let messages from remote nodes change the DISCONNECTED flag.
1093 This flag is handled locally based on whether the local node
1094 can communicate with the node or not.
1096 c->new_flags &= ~NODE_FLAGS_DISCONNECTED;
1097 if (nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED) {
1098 c->new_flags |= NODE_FLAGS_DISCONNECTED;
1101 if (nodemap->nodes[i].flags != c->new_flags) {
1102 DEBUG(0,("Node %u has changed flags - now 0x%x was 0x%x\n", c->vnn, c->new_flags, c->old_flags));
1105 nodemap->nodes[i].flags = c->new_flags;
1107 ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(),
1108 CTDB_CURRENT_NODE, &ctdb->recovery_master);
1111 ret = ctdb_ctrl_getrecmode(ctdb, tmp_ctx, CONTROL_TIMEOUT(),
1112 CTDB_CURRENT_NODE, &ctdb->recovery_mode);
1116 ctdb->recovery_master == ctdb->vnn &&
1117 ctdb->recovery_mode == CTDB_RECOVERY_NORMAL &&
1118 ctdb->takeover.enabled) {
1119 /* Only do the takeover run if the perm disabled or unhealthy
1120 flags changed since these will cause an ip failover but not
1122 If the node became disconnected or banned this will also
1123 lead to an ip address failover but that is handled
1126 if (changed_flags & NODE_FLAGS_DISABLED) {
1127 ret = ctdb_takeover_run(ctdb, nodemap);
1129 DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
1131 /* send a message to all clients telling them that the
1132 cluster has been reconfigured */
1133 ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_RECONFIGURE, tdb_null);
1137 talloc_free(tmp_ctx);
1141 enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_FAILED};
1144 /* verify that all nodes are in recovery mode normal */
1145 static enum monitor_result verify_recmode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, TALLOC_CTX *mem_ctx)
1147 struct ctdb_client_control_state **ctrl_states;
1151 ctrl_states = talloc_array(mem_ctx, struct ctdb_client_control_state *,
1154 DEBUG(0,(__location__ " Failed to allocate temporary ctrl state array\n"));
1159 /* loop over all active nodes and send an async getrecmode call to
1161 for (j=0; j<nodemap->num; j++) {
1162 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1163 ctrl_states[j] = NULL;
1166 ctrl_states[j] = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx,
1168 nodemap->nodes[j].vnn);
1171 /* wait for the responses to come back and check that all is ok */
1172 for (j=0; j<nodemap->num; j++) {
1173 if (ctrl_states[j] == NULL) {
1176 ret = ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, ctrl_states[j], &recmode);
1178 DEBUG(0, ("Unable to get recmode from node %u\n", nodemap->nodes[j].vnn));
1179 talloc_free(ctrl_states);
1180 return MONITOR_FAILED;
1182 if (recmode != CTDB_RECOVERY_NORMAL) {
1183 DEBUG(0, (__location__ " Node:%u was in recovery mode. Restart recovery process\n", nodemap->nodes[j].vnn));
1184 talloc_free(ctrl_states);
1185 return MONITOR_RECOVERY_NEEDED;
1189 talloc_free(ctrl_states);
1195 the main monitoring loop
1197 static void monitor_cluster(struct ctdb_context *ctdb)
1199 uint32_t vnn, num_active, recmaster;
1200 TALLOC_CTX *mem_ctx=NULL;
1201 struct ctdb_node_map *nodemap=NULL;
1202 struct ctdb_node_map *remote_nodemap=NULL;
1203 struct ctdb_vnn_map *vnnmap=NULL;
1204 struct ctdb_vnn_map *remote_vnnmap=NULL;
1206 bool need_takeover_run;
1207 struct ctdb_recoverd *rec;
1209 rec = talloc_zero(ctdb, struct ctdb_recoverd);
1210 CTDB_NO_MEMORY_FATAL(ctdb, rec);
1213 rec->banned_nodes = talloc_zero_array(rec, struct ban_state *, ctdb->num_nodes);
1214 CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);
1216 rec->priority_time = timeval_current();
1218 /* register a message port for recovery elections */
1219 ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, rec);
1221 /* and one for when nodes are disabled/enabled */
1222 ctdb_set_message_handler(ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED, monitor_handler, rec);
1224 /* and one for when nodes are banned */
1225 ctdb_set_message_handler(ctdb, CTDB_SRVID_BAN_NODE, ban_handler, rec);
1227 /* and one for when nodes are unbanned */
1228 ctdb_set_message_handler(ctdb, CTDB_SRVID_UNBAN_NODE, unban_handler, rec);
1231 need_takeover_run = false;
1234 talloc_free(mem_ctx);
1237 mem_ctx = talloc_new(ctdb);
1239 DEBUG(0,("Failed to create temporary context\n"));
1243 /* we only check for recovery once every second */
1244 ctdb_wait_timeout(ctdb, ctdb->tunable.recover_interval);
1246 /* get relevant tunables */
1247 ret = ctdb_ctrl_get_all_tunables(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &ctdb->tunable);
1249 DEBUG(0,("Failed to get tunables - retrying\n"));
1253 vnn = ctdb_ctrl_getvnn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
1254 if (vnn == (uint32_t)-1) {
1255 DEBUG(0,("Failed to get local vnn - retrying\n"));
1259 /* get the vnnmap */
1260 ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, &vnnmap);
1262 DEBUG(0, (__location__ " Unable to get vnnmap from node %u\n", vnn));
1267 /* get number of nodes */
1268 ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, &nodemap);
1270 DEBUG(0, (__location__ " Unable to get nodemap from node %u\n", vnn));
1275 /* count how many active nodes there are */
1277 for (i=0; i<nodemap->num; i++) {
1278 if (rec->banned_nodes[nodemap->nodes[i].vnn] != NULL) {
1279 nodemap->nodes[i].flags |= NODE_FLAGS_BANNED;
1281 nodemap->nodes[i].flags &= ~NODE_FLAGS_BANNED;
1283 if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
1289 /* check which node is the recovery master */
1290 ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), vnn, &recmaster);
1292 DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", vnn));
1296 if (recmaster == (uint32_t)-1) {
1297 DEBUG(0,(__location__ " Initial recovery master set - forcing election\n"));
1298 force_election(rec, mem_ctx, vnn, nodemap);
1302 /* verify that the recmaster node is still active */
1303 for (j=0; j<nodemap->num; j++) {
1304 if (nodemap->nodes[j].vnn==recmaster) {
1309 if (j == nodemap->num) {
1310 DEBUG(0, ("Recmaster node %u not in list. Force reelection\n", recmaster));
1311 force_election(rec, mem_ctx, vnn, nodemap);
1315 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1316 DEBUG(0, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].vnn));
1317 force_election(rec, mem_ctx, vnn, nodemap);
1322 /* if we are not the recmaster then we do not need to check
1323 if recovery is needed
1325 if (vnn!=recmaster) {
1330 /* verify that all active nodes agree that we are the recmaster */
1331 for (j=0; j<nodemap->num; j++) {
1332 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1335 if (nodemap->nodes[j].vnn == vnn) {
1339 ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, &recmaster);
1341 DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", vnn));
1345 if (recmaster!=vnn) {
1346 DEBUG(0, ("Node %u does not agree we are the recmaster. Force reelection\n",
1347 nodemap->nodes[j].vnn));
1348 force_election(rec, mem_ctx, vnn, nodemap);
1354 /* verify that all active nodes are in normal mode
1355 and not in recovery mode
1357 /* send a getrecmode call out to every node */
1358 switch (verify_recmode(ctdb, nodemap, mem_ctx)) {
1359 case MONITOR_RECOVERY_NEEDED:
1360 do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
1362 case MONITOR_FAILED:
1370 /* get the nodemap for all active remote nodes and verify
1371 they are the same as for this node
1373 for (j=0; j<nodemap->num; j++) {
1374 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1377 if (nodemap->nodes[j].vnn == vnn) {
1381 ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn,
1382 mem_ctx, &remote_nodemap);
1384 DEBUG(0, (__location__ " Unable to get nodemap from remote node %u\n",
1385 nodemap->nodes[j].vnn));
1389 /* if the nodes disagree on how many nodes there are
1390 then this is a good reason to try recovery
1392 if (remote_nodemap->num != nodemap->num) {
1393 DEBUG(0, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n",
1394 nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num));
1395 do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
1399 /* if the nodes disagree on which nodes exist and are
1400 active, then that is also a good reason to do recovery
1402 for (i=0;i<nodemap->num;i++) {
1403 if (remote_nodemap->nodes[i].vnn != nodemap->nodes[i].vnn) {
1404 DEBUG(0, (__location__ " Remote node:%u has different nodemap vnn for %d (%u vs %u).\n",
1405 nodemap->nodes[j].vnn, i,
1406 remote_nodemap->nodes[i].vnn, nodemap->nodes[i].vnn));
1407 do_recovery(rec, mem_ctx, vnn, num_active, nodemap,
1408 vnnmap, nodemap->nodes[j].vnn);
1411 if ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) !=
1412 (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
1413 DEBUG(0, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n",
1414 nodemap->nodes[j].vnn, i,
1415 remote_nodemap->nodes[i].flags, nodemap->nodes[i].flags));
1416 do_recovery(rec, mem_ctx, vnn, num_active, nodemap,
1417 vnnmap, nodemap->nodes[j].vnn);
1422 /* update our nodemap flags according to the other
1423 server - this gets the NODE_FLAGS_DISABLED
1424 flag. Note that the remote node is authoritative
1425 for its flags (except CONNECTED, which we know
1426 matches in this code) */
1427 if (nodemap->nodes[j].flags != remote_nodemap->nodes[j].flags) {
1428 nodemap->nodes[j].flags = remote_nodemap->nodes[j].flags;
1429 need_takeover_run = true;
1434 /* there better be the same number of lmasters in the vnn map
1435 as there are active nodes or we will have to do a recovery
1437 if (vnnmap->size != num_active) {
1438 DEBUG(0, (__location__ " The vnnmap count is different from the number of active nodes. %u vs %u\n",
1439 vnnmap->size, num_active));
1440 do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, ctdb->vnn);
1444 /* verify that all active nodes in the nodemap also exist in
1447 for (j=0; j<nodemap->num; j++) {
1448 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1451 if (nodemap->nodes[j].vnn == vnn) {
1455 for (i=0; i<vnnmap->size; i++) {
1456 if (vnnmap->map[i] == nodemap->nodes[j].vnn) {
1460 if (i == vnnmap->size) {
1461 DEBUG(0, (__location__ " Node %u is active in the nodemap but did not exist in the vnnmap\n",
1462 nodemap->nodes[j].vnn));
1463 do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
1469 /* verify that all other nodes have the same vnnmap
1470 and are from the same generation
1472 for (j=0; j<nodemap->num; j++) {
1473 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1476 if (nodemap->nodes[j].vnn == vnn) {
1480 ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn,
1481 mem_ctx, &remote_vnnmap);
1483 DEBUG(0, (__location__ " Unable to get vnnmap from remote node %u\n",
1484 nodemap->nodes[j].vnn));
1488 /* verify the vnnmap generation is the same */
1489 if (vnnmap->generation != remote_vnnmap->generation) {
1490 DEBUG(0, (__location__ " Remote node %u has different generation of vnnmap. %u vs %u (ours)\n",
1491 nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation));
1492 do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
1496 /* verify the vnnmap size is the same */
1497 if (vnnmap->size != remote_vnnmap->size) {
1498 DEBUG(0, (__location__ " Remote node %u has different size of vnnmap. %u vs %u (ours)\n",
1499 nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size));
1500 do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
1504 /* verify the vnnmap is the same */
1505 for (i=0;i<vnnmap->size;i++) {
1506 if (remote_vnnmap->map[i] != vnnmap->map[i]) {
1507 DEBUG(0, (__location__ " Remote node %u has different vnnmap.\n",
1508 nodemap->nodes[j].vnn));
1509 do_recovery(rec, mem_ctx, vnn, num_active, nodemap,
1510 vnnmap, nodemap->nodes[j].vnn);
1516 /* we might need to change who has what IP assigned */
1517 if (need_takeover_run && ctdb->takeover.enabled) {
1518 ret = ctdb_takeover_run(ctdb, nodemap);
1520 DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
1529 event handler for when the main ctdbd dies
1531 static void ctdb_recoverd_parent(struct event_context *ev, struct fd_event *fde,
1532 uint16_t flags, void *private_data)
1534 DEBUG(0,("recovery daemon parent died - exiting\n"));
1541 startup the recovery daemon as a child of the main ctdb daemon
1543 int ctdb_start_recoverd(struct ctdb_context *ctdb)
1549 if (pipe(fd) != 0) {
1565 /* shutdown the transport */
1566 ctdb->methods->shutdown(ctdb);
1568 /* get a new event context */
1569 talloc_free(ctdb->ev);
1570 ctdb->ev = event_context_init(ctdb);
1572 event_add_fd(ctdb->ev, ctdb, fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
1573 ctdb_recoverd_parent, &fd[0]);
1575 close(ctdb->daemon.sd);
1576 ctdb->daemon.sd = -1;
1578 srandom(getpid() ^ time(NULL));
1580 /* initialise ctdb */
1581 ret = ctdb_socket_connect(ctdb);
1583 DEBUG(0, (__location__ " Failed to init ctdb\n"));
1587 monitor_cluster(ctdb);
1589 DEBUG(0,("ERROR: ctdb_recoverd finished!?\n"));