2 CTDB IP takeover helper
4 Copyright (C) Martin Schwenke 2016
6 Based on ctdb_recovery_helper.c
7 Copyright (C) Amitay Isaacs 2015
10 Copyright (C) Ronnie Sahlberg 2007
11 Copyright (C) Andrew Tridgell 2007
12 Copyright (C) Martin Schwenke 2011
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or
17 (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 #include "system/network.h"
30 #include "system/filesys.h"
36 #include "lib/util/debug.h"
37 #include "lib/util/strv.h"
38 #include "lib/util/strv_util.h"
39 #include "lib/util/sys_rw.h"
40 #include "lib/util/time.h"
41 #include "lib/util/tevent_unix.h"
43 #include "protocol/protocol.h"
44 #include "protocol/protocol_api.h"
45 #include "client/client.h"
47 #include "common/logging.h"
49 #include "server/ipalloc.h"
51 static int takeover_timeout = 9;
53 #define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
59 static bool generic_recv(struct tevent_req *req, int *perr)
63 if (tevent_req_is_unix_error(req, &err)) {
73 static enum ipalloc_algorithm
74 determine_algorithm(const struct ctdb_tunable_list *tunables)
76 switch (tunables->ip_alloc_algorithm) {
78 return IPALLOC_DETERMINISTIC;
80 return IPALLOC_NONDETERMINISTIC;
88 /**********************************************************************/
90 struct get_public_ips_state {
91 struct tevent_context *ev;
92 struct ctdb_client_context *client;
95 struct ctdb_public_ip_list *ips;
98 static void get_public_ips_done(struct tevent_req *subreq);
100 static struct tevent_req *get_public_ips_send(
102 struct tevent_context *ev,
103 struct ctdb_client_context *client,
108 struct tevent_req *req, *subreq;
109 struct get_public_ips_state *state;
110 struct ctdb_req_control request;
112 req = tevent_req_create(mem_ctx, &state, struct get_public_ips_state);
114 return tevent_req_post(req, ev);
118 state->count = count;
121 ctdb_req_control_get_public_ips(&request, available_only);
122 subreq = ctdb_client_control_multi_send(mem_ctx, ev, client,
125 TIMEOUT(), &request);
126 if (tevent_req_nomem(subreq, req)) {
127 return tevent_req_post(req, ev);
129 tevent_req_set_callback(subreq, get_public_ips_done, req);
134 static void get_public_ips_done(struct tevent_req *subreq)
136 struct tevent_req *req = tevent_req_callback_data(
137 subreq, struct tevent_req);
138 struct get_public_ips_state *state = tevent_req_data(
139 req, struct get_public_ips_state);
140 struct ctdb_reply_control **reply;
145 status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
152 ret2 = ctdb_client_control_multi_error(state->pnns,
156 D_ERR("control GET_PUBLIC_IPS failed on "
157 "node %u, ret=%d\n", pnn, ret2);
159 D_ERR("control GET_PUBLIC_IPS failed, "
162 tevent_req_error(req, ret);
166 state->ips = talloc_zero_array(state, struct ctdb_public_ip_list,
168 if (tevent_req_nomem(state->ips, req)) {
172 for (i = 0; i < state->count; i++) {
174 struct ctdb_public_ip_list *ips;
176 pnn = state->pnns[i];
177 ret = ctdb_reply_control_get_public_ips(reply[i], state->ips,
180 D_ERR("control GET_PUBLIC_IPS failed on "
182 tevent_req_error(req, EIO);
185 state->ips[pnn] = *ips;
190 tevent_req_done(req);
193 static bool get_public_ips_recv(struct tevent_req *req, int *perr,
195 struct ctdb_public_ip_list **ips)
197 struct get_public_ips_state *state = tevent_req_data(
198 req, struct get_public_ips_state);
201 if (tevent_req_is_unix_error(req, &err)) {
208 *ips = talloc_steal(mem_ctx, state->ips);
213 /**********************************************************************/
215 struct release_ip_state {
220 uint32_t *ban_credits;
223 struct release_ip_one_state {
224 struct tevent_req *req;
230 static void release_ip_done(struct tevent_req *subreq);
232 static struct tevent_req *release_ip_send(TALLOC_CTX *mem_ctx,
233 struct tevent_context *ev,
234 struct ctdb_client_context *client,
237 struct timeval timeout,
238 struct public_ip_list *all_ips,
239 uint32_t *ban_credits)
241 struct tevent_req *req, *subreq;
242 struct release_ip_state *state;
243 struct ctdb_req_control request;
244 struct public_ip_list *tmp_ip;
246 req = tevent_req_create(mem_ctx, &state, struct release_ip_state);
252 state->num_replies = 0;
253 state->num_fails = 0;
254 state->ban_credits = ban_credits;
256 /* Send a RELEASE_IP to all nodes that should not be hosting
257 * each IP. For each IP, all but one of these will be
258 * redundant. However, the redundant ones are used to tell
259 * nodes which node should be hosting the IP so that commands
260 * like "ctdb ip" can display a particular nodes idea of who
261 * is hosting what. */
262 for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
263 struct release_ip_one_state *substate;
264 struct ctdb_public_ip ip;
267 substate = talloc_zero(state, struct release_ip_one_state);
268 if (tevent_req_nomem(substate, req)) {
269 return tevent_req_post(req, ev);
272 substate->pnns = talloc_zero_array(substate, uint32_t, count);
273 if (tevent_req_nomem(substate->pnns, req)) {
274 return tevent_req_post(req, ev);
280 substate->ip_str = ctdb_sock_addr_to_string(substate,
282 if (tevent_req_nomem(substate->ip_str, req)) {
283 return tevent_req_post(req, ev);
286 for (i = 0; i < count; i++) {
287 uint32_t pnn = pnns[i];
288 /* If pnn is not the node that should be
289 * hosting the IP then add it to the list of
290 * nodes that need to do a release. */
291 if (tmp_ip->pnn != pnn) {
292 substate->pnns[substate->count] = pnn;
297 ip.pnn = tmp_ip->pnn;
298 ip.addr = tmp_ip->addr;
299 ctdb_req_control_release_ip(&request, &ip);
300 subreq = ctdb_client_control_multi_send(state, ev, client,
303 timeout,/* cumulative */
305 if (tevent_req_nomem(subreq, req)) {
306 return tevent_req_post(req, ev);
308 tevent_req_set_callback(subreq, release_ip_done, substate);
316 static void release_ip_done(struct tevent_req *subreq)
318 struct release_ip_one_state *substate = tevent_req_callback_data(
319 subreq, struct release_ip_one_state);
320 struct tevent_req *req = substate->req;
321 struct release_ip_state *state = tevent_req_data(
322 req, struct release_ip_state);
325 bool status, found_errors;
327 status = ctdb_client_control_multi_recv(subreq, &ret, state,
332 D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
333 substate->ip_str, substate->count);
337 /* Get some clear error messages out of err_list and count
340 found_errors = false;
341 for (i = 0; i < substate->count; i++) {
342 int err = err_list[i];
344 uint32_t pnn = substate->pnns[i];
346 D_ERR("RELEASE_IP %s failed on node %u, "
347 "ret=%d\n", substate->ip_str, pnn, err);
349 state->ban_credits[pnn]++;
350 state->err_any = err;
354 if (! found_errors) {
355 D_ERR("RELEASE_IP %s internal error, ret=%d\n",
356 substate->ip_str, ret);
357 state->err_any = EIO;
363 talloc_free(substate);
365 state->num_replies++;
367 if (state->num_replies < state->num_sent) {
368 /* Not all replies received, don't go further */
372 if (state->num_fails > 0) {
373 tevent_req_error(req, state->err_any);
377 tevent_req_done(req);
380 static bool release_ip_recv(struct tevent_req *req, int *perr)
382 return generic_recv(req, perr);
385 /**********************************************************************/
387 struct take_ip_state {
392 uint32_t *ban_credits;
395 struct take_ip_one_state {
396 struct tevent_req *req;
401 static void take_ip_done(struct tevent_req *subreq);
403 static struct tevent_req *take_ip_send(TALLOC_CTX *mem_ctx,
404 struct tevent_context *ev,
405 struct ctdb_client_context *client,
406 struct timeval timeout,
407 struct public_ip_list *all_ips,
408 uint32_t *ban_credits)
410 struct tevent_req *req, *subreq;
411 struct take_ip_state *state;
412 struct ctdb_req_control request;
413 struct public_ip_list *tmp_ip;
415 req = tevent_req_create(mem_ctx, &state, struct take_ip_state);
421 state->num_replies = 0;
422 state->num_fails = 0;
423 state->ban_credits = ban_credits;
425 /* For each IP, send a TAKOVER_IP to the node that should be
426 * hosting it. Many of these will often be redundant (since
427 * the allocation won't have changed) but they can be useful
428 * to recover from inconsistencies. */
429 for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
430 struct take_ip_one_state *substate;
431 struct ctdb_public_ip ip;
433 if (tmp_ip->pnn == -1) {
434 /* IP will be unassigned */
438 substate = talloc_zero(state, struct take_ip_one_state);
439 if (tevent_req_nomem(substate, req)) {
440 return tevent_req_post(req, ev);
444 substate->pnn = tmp_ip->pnn;
446 substate->ip_str = ctdb_sock_addr_to_string(substate,
448 if (tevent_req_nomem(substate->ip_str, req)) {
449 return tevent_req_post(req, ev);
452 ip.pnn = tmp_ip->pnn;
453 ip.addr = tmp_ip->addr;
454 ctdb_req_control_takeover_ip(&request, &ip);
455 subreq = ctdb_client_control_send(
456 state, ev, client, tmp_ip->pnn,
457 timeout, /* cumulative */
459 if (tevent_req_nomem(subreq, req)) {
460 return tevent_req_post(req, ev);
462 tevent_req_set_callback(subreq, take_ip_done, substate);
467 /* None sent, finished... */
468 if (state->num_sent == 0) {
469 tevent_req_done(req);
470 return tevent_req_post(req, ev);
476 static void take_ip_done(struct tevent_req *subreq)
478 struct take_ip_one_state *substate = tevent_req_callback_data(
479 subreq, struct take_ip_one_state);
480 struct tevent_req *req = substate->req;
481 struct ctdb_reply_control *reply;
482 struct take_ip_state *state = tevent_req_data(
483 req, struct take_ip_state);
487 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
491 D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
492 substate->ip_str, substate->pnn, ret);
496 ret = ctdb_reply_control_takeover_ip(reply);
498 D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
499 substate->ip_str, substate->pnn, ret);
503 D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
504 substate->ip_str, substate->pnn);
508 state->ban_credits[substate->pnn]++;
510 state->err_any = ret;
513 talloc_free(substate);
515 state->num_replies++;
517 if (state->num_replies < state->num_sent) {
518 /* Not all replies received, don't go further */
522 if (state->num_fails > 0) {
523 tevent_req_error(req, state->err_any);
527 tevent_req_done(req);
530 static bool take_ip_recv(struct tevent_req *req, int *perr)
532 return generic_recv(req, perr);
535 /**********************************************************************/
537 struct ipreallocated_state {
540 uint32_t *ban_credits;
543 static void ipreallocated_done(struct tevent_req *subreq);
545 static struct tevent_req *ipreallocated_send(TALLOC_CTX *mem_ctx,
546 struct tevent_context *ev,
547 struct ctdb_client_context *client,
550 struct timeval timeout,
551 uint32_t *ban_credits)
553 struct tevent_req *req, *subreq;
554 struct ipreallocated_state *state;
555 struct ctdb_req_control request;
557 req = tevent_req_create(mem_ctx, &state, struct ipreallocated_state);
563 state->count = count;
564 state->ban_credits = ban_credits;
566 ctdb_req_control_ipreallocated(&request);
567 subreq = ctdb_client_control_multi_send(state, ev, client,
569 timeout, /* cumulative */
571 if (tevent_req_nomem(subreq, req)) {
572 return tevent_req_post(req, ev);
574 tevent_req_set_callback(subreq, ipreallocated_done, req);
579 static void ipreallocated_done(struct tevent_req *subreq)
581 struct tevent_req *req = tevent_req_callback_data(
582 subreq, struct tevent_req);
583 struct ipreallocated_state *state = tevent_req_data(
584 req, struct ipreallocated_state);
585 int *err_list = NULL;
587 bool status, found_errors;
589 status = ctdb_client_control_multi_recv(subreq, &ret, state,
594 D_INFO("IPREALLOCATED succeeded on %d nodes\n", state->count);
595 tevent_req_done(req);
599 /* Get some clear error messages out of err_list and count
602 found_errors = false;
603 for (i = 0; i < state->count; i++) {
604 int err = err_list[i];
606 uint32_t pnn = state->pnns[i];
608 D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
611 state->ban_credits[pnn]++;
616 if (! found_errors) {
617 D_ERR("IPREALLOCATED internal error, ret=%d\n", ret);
620 tevent_req_error(req, ret);
623 static bool ipreallocated_recv(struct tevent_req *req, int *perr)
625 return generic_recv(req, perr);
628 /**********************************************************************/
631 * Recalculate the allocation of public IPs to nodes and have the
632 * nodes host their allocated addresses.
636 * - Initialise IP allocation state. Pass:
637 * + algorithm to be used;
638 * + various tunables (NoIPTakeover, NoIPFailback, NoIPHostOnAllDisabled)
639 * + list of nodes to force rebalance (internal structure, currently
640 * no way to fetch, only used by LCP2 for nodes that have had new
641 * IP addresses added).
642 * - Set IP flags for IP allocation based on node map
643 * - Retrieve known and available IP addresses (done separately so
644 * values can be faked in unit testing)
645 * - Use ipalloc_set_public_ips() to set known and available IP
646 * addresses for allocation
647 * - If cluster can't host IP addresses then jump to IPREALLOCATED
648 * - Run IP allocation algorithm
649 * - Send RELEASE_IP to all nodes for IPs they should not host
650 * - Send TAKE_IP to all nodes for IPs they should host
651 * - Send IPREALLOCATED to all nodes
654 struct takeover_state {
655 struct tevent_context *ev;
656 struct ctdb_client_context *client;
657 struct timeval timeout;
659 uint32_t *pnns_connected;
661 uint32_t *pnns_active;
664 uint32_t *force_rebalance_nodes;
665 struct ctdb_tunable_list *tun_list;
666 struct ipalloc_state *ipalloc_state;
667 struct ctdb_public_ip_list *known_ips;
668 struct public_ip_list *all_ips;
669 uint32_t *ban_credits;
672 static void takeover_tunables_done(struct tevent_req *subreq);
673 static void takeover_nodemap_done(struct tevent_req *subreq);
674 static void takeover_known_ips_done(struct tevent_req *subreq);
675 static void takeover_avail_ips_done(struct tevent_req *subreq);
676 static void takeover_release_ip_done(struct tevent_req *subreq);
677 static void takeover_take_ip_done(struct tevent_req *subreq);
678 static void takeover_ipreallocated(struct tevent_req *req);
679 static void takeover_ipreallocated_done(struct tevent_req *subreq);
680 static void takeover_failed(struct tevent_req *subreq, int ret);
681 static void takeover_failed_done(struct tevent_req *subreq);
683 static struct tevent_req *takeover_send(TALLOC_CTX *mem_ctx,
684 struct tevent_context *ev,
685 struct ctdb_client_context *client,
686 uint32_t *force_rebalance_nodes)
688 struct tevent_req *req, *subreq;
689 struct takeover_state *state;
690 struct ctdb_req_control request;
692 req = tevent_req_create(mem_ctx, &state, struct takeover_state);
698 state->client = client;
699 state->force_rebalance_nodes = force_rebalance_nodes;
700 state->destnode = ctdb_client_pnn(client);
702 ctdb_req_control_get_all_tunables(&request);
703 subreq = ctdb_client_control_send(state, state->ev, state->client,
704 state->destnode, TIMEOUT(),
706 if (tevent_req_nomem(subreq, req)) {
707 return tevent_req_post(req, ev);
709 tevent_req_set_callback(subreq, takeover_tunables_done, req);
714 static void takeover_tunables_done(struct tevent_req *subreq)
716 struct tevent_req *req = tevent_req_callback_data(
717 subreq, struct tevent_req);
718 struct takeover_state *state = tevent_req_data(
719 req, struct takeover_state);
720 struct ctdb_reply_control *reply;
721 struct ctdb_req_control request;
725 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
728 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
729 tevent_req_error(req, ret);
733 ret = ctdb_reply_control_get_all_tunables(reply, state,
736 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
737 tevent_req_error(req, ret);
743 takeover_timeout = state->tun_list->takeover_timeout;
745 ctdb_req_control_get_nodemap(&request);
746 subreq = ctdb_client_control_send(state, state->ev, state->client,
747 state->destnode, TIMEOUT(),
749 if (tevent_req_nomem(subreq, req)) {
752 tevent_req_set_callback(subreq, takeover_nodemap_done, req);
755 static void takeover_nodemap_done(struct tevent_req *subreq)
757 struct tevent_req *req = tevent_req_callback_data(
758 subreq, struct tevent_req);
759 struct takeover_state *state = tevent_req_data(
760 req, struct takeover_state);
761 struct ctdb_reply_control *reply;
764 struct ctdb_node_map *nodemap;
766 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
769 D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
770 state->destnode, ret);
771 tevent_req_error(req, ret);
775 ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
777 D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
778 tevent_req_error(req, ret);
782 state->num_nodes = nodemap->num;
784 state->num_connected = list_of_connected_nodes(nodemap,
785 CTDB_UNKNOWN_PNN, state,
786 &state->pnns_connected);
787 if (state->num_connected <= 0) {
788 tevent_req_error(req, ENOMEM);
792 state->num_active = list_of_active_nodes(nodemap,
793 CTDB_UNKNOWN_PNN, state,
794 &state->pnns_active);
795 if (state->num_active <= 0) {
796 tevent_req_error(req, ENOMEM);
800 /* Default timeout for early jump to IPREALLOCATED. See below
801 * for explanation of 3 times...
803 state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
805 state->ban_credits = talloc_zero_array(state, uint32_t,
807 if (tevent_req_nomem(state->ban_credits, req)) {
811 if (state->tun_list->disable_ip_failover != 0) {
812 /* IP failover is completely disabled so just send out
813 * ipreallocated event.
815 takeover_ipreallocated(req);
819 state->ipalloc_state =
821 state, state->num_nodes,
822 determine_algorithm(state->tun_list),
823 (state->tun_list->no_ip_takeover != 0),
824 (state->tun_list->no_ip_failback != 0),
825 (state->tun_list->no_ip_host_on_all_disabled != 0),
826 state->force_rebalance_nodes);
827 if (tevent_req_nomem(state->ipalloc_state, req)) {
831 ipalloc_set_node_flags(state->ipalloc_state, nodemap);
833 subreq = get_public_ips_send(state, state->ev, state->client,
834 state->pnns_active, state->num_active,
836 if (tevent_req_nomem(subreq, req)) {
840 tevent_req_set_callback(subreq, takeover_known_ips_done, req);
843 static void takeover_known_ips_done(struct tevent_req *subreq)
845 struct tevent_req *req = tevent_req_callback_data(
846 subreq, struct tevent_req);
847 struct takeover_state *state = tevent_req_data(
848 req, struct takeover_state);
852 status = get_public_ips_recv(subreq, &ret, state, &state->known_ips);
856 D_ERR("Failed to fetch known public IPs\n");
857 tevent_req_error(req, ret);
861 subreq = get_public_ips_send(state, state->ev, state->client,
862 state->pnns_active, state->num_active,
864 if (tevent_req_nomem(subreq, req)) {
868 tevent_req_set_callback(subreq, takeover_avail_ips_done, req);
871 static void takeover_avail_ips_done(struct tevent_req *subreq)
873 struct tevent_req *req = tevent_req_callback_data(
874 subreq, struct tevent_req);
875 struct takeover_state *state = tevent_req_data(
876 req, struct takeover_state);
879 struct ctdb_public_ip_list *available_ips;
881 status = get_public_ips_recv(subreq, &ret, state, &available_ips);
885 D_ERR("Failed to fetch available public IPs\n");
886 tevent_req_error(req, ret);
890 ipalloc_set_public_ips(state->ipalloc_state,
891 state->known_ips, available_ips);
893 if (! ipalloc_can_host_ips(state->ipalloc_state)) {
894 D_NOTICE("No nodes available to host public IPs yet\n");
895 takeover_ipreallocated(req);
899 /* Do the IP reassignment calculations */
900 state->all_ips = ipalloc(state->ipalloc_state);
901 if (tevent_req_nomem(state->all_ips, req)) {
905 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
906 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
907 * seconds. However, RELEASE_IP can take longer due to TCP
908 * connection killing, so sometimes needs more time.
909 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
910 * seconds across all 3 stages. No explicit expiry checks are
911 * needed before each stage because tevent is smart enough to
912 * fire the timeouts even if they are in the past. Initialise
913 * this here so it explicitly covers the stages we're
914 * interested in but, in particular, not the time taken by the
917 state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
919 subreq = release_ip_send(state, state->ev, state->client,
920 state->pnns_connected, state->num_connected,
921 state->timeout, state->all_ips,
923 if (tevent_req_nomem(subreq, req)) {
926 tevent_req_set_callback(subreq, takeover_release_ip_done, req);
929 static void takeover_release_ip_done(struct tevent_req *subreq)
931 struct tevent_req *req = tevent_req_callback_data(
932 subreq, struct tevent_req);
933 struct takeover_state *state = tevent_req_data(
934 req, struct takeover_state);
938 status = release_ip_recv(subreq, &ret);
942 takeover_failed(req, ret);
946 /* All released, now for takeovers */
948 subreq = take_ip_send(state, state->ev, state->client,
949 state->timeout, state->all_ips,
951 if (tevent_req_nomem(subreq, req)) {
954 tevent_req_set_callback(subreq, takeover_take_ip_done, req);
957 static void takeover_take_ip_done(struct tevent_req *subreq)
959 struct tevent_req *req = tevent_req_callback_data(
960 subreq, struct tevent_req);
964 status = take_ip_recv(subreq, &ret);
968 takeover_failed(req, ret);
972 takeover_ipreallocated(req);
975 static void takeover_ipreallocated(struct tevent_req *req)
977 struct takeover_state *state = tevent_req_data(
978 req, struct takeover_state);
979 struct tevent_req *subreq;
981 subreq = ipreallocated_send(state, state->ev, state->client,
982 state->pnns_connected,
983 state->num_connected,
986 if (tevent_req_nomem(subreq, req)) {
989 tevent_req_set_callback(subreq, takeover_ipreallocated_done, req);
992 static void takeover_ipreallocated_done(struct tevent_req *subreq)
994 struct tevent_req *req = tevent_req_callback_data(
995 subreq, struct tevent_req);
999 status = ipreallocated_recv(subreq, &ret);
1000 TALLOC_FREE(subreq);
1003 takeover_failed(req, ret);
1007 tevent_req_done(req);
1010 struct takeover_failed_state {
1011 struct tevent_req *req;
1015 void takeover_failed(struct tevent_req *req, int ret)
1017 struct takeover_state *state = tevent_req_data(
1018 req, struct takeover_state);
1019 struct tevent_req *subreq;
1020 uint32_t max_pnn = CTDB_UNKNOWN_PNN;
1021 int max_credits = 0;
1024 /* Check that bans are enabled */
1025 if (state->tun_list->enable_bans == 0) {
1026 tevent_req_error(req, ret);
1030 for (pnn = 0; pnn < state->num_nodes; pnn++) {
1031 if (state->ban_credits[pnn] > max_credits) {
1033 max_credits = state->ban_credits[pnn];
1037 if (max_credits > 0) {
1038 struct ctdb_req_message message;
1039 struct takeover_failed_state *substate;
1041 D_WARNING("Assigning banning credits to node %u\n", max_pnn);
1043 substate = talloc_zero(state, struct takeover_failed_state);
1044 if (tevent_req_nomem(substate, req)) {
1047 substate->req = req;
1048 substate->ret = ret;
1050 message.srvid = CTDB_SRVID_BANNING;
1051 message.data.pnn = max_pnn;
1053 subreq = ctdb_client_message_send(
1054 state, state->ev, state->client,
1055 ctdb_client_pnn(state->client),
1057 if (subreq == NULL) {
1058 D_ERR("failed to assign banning credits\n");
1059 tevent_req_error(req, ret);
1062 tevent_req_set_callback(subreq, takeover_failed_done, substate);
1064 tevent_req_error(req, ret);
1068 static void takeover_failed_done(struct tevent_req *subreq)
1070 struct takeover_failed_state *substate = tevent_req_callback_data(
1071 subreq, struct takeover_failed_state);
1072 struct tevent_req *req = substate->req;
1076 status = ctdb_client_message_recv(subreq, &ret);
1077 TALLOC_FREE(subreq);
1079 D_ERR("failed to assign banning credits, ret=%d\n", ret);
1082 ret = substate->ret;
1083 talloc_free(substate);
1084 tevent_req_error(req, ret);
1087 static void takeover_recv(struct tevent_req *req, int *perr)
1089 generic_recv(req, perr);
1092 static uint32_t *parse_node_list(TALLOC_CTX *mem_ctx, const char* s)
1099 ret = strv_split(mem_ctx, &strv, s, ",");
1101 D_ERR("out of memory\n");
1105 num = strv_count(strv);
1107 nodes = talloc_array(mem_ctx, uint32_t, num);
1108 if (nodes == NULL) {
1109 D_ERR("out of memory\n");
1114 for (i = 0; i < num; i++) {
1115 t = strv_next(strv, t);
1122 static void usage(const char *progname)
1125 "\nUsage: %s <output-fd> <ctdb-socket-path> "
1126 "[<force-rebalance-nodes>]\n",
1131 * Arguments - write fd, socket path
1133 int main(int argc, const char *argv[])
1136 const char *sockpath;
1137 TALLOC_CTX *mem_ctx;
1138 struct tevent_context *ev;
1139 struct ctdb_client_context *client;
1141 struct tevent_req *req;
1142 uint32_t *force_rebalance_nodes = NULL;
1144 if (argc < 3 || argc > 4) {
1149 write_fd = atoi(argv[1]);
1152 mem_ctx = talloc_new(NULL);
1153 if (mem_ctx == NULL) {
1154 fprintf(stderr, "talloc_new() failed\n");
1160 force_rebalance_nodes = parse_node_list(mem_ctx, argv[3]);
1161 if (force_rebalance_nodes == NULL) {
1167 logging_init(mem_ctx, NULL, NULL, "ctdb-takeover");
1169 ev = tevent_context_init(mem_ctx);
1171 D_ERR("tevent_context_init() failed\n");
1176 ret = ctdb_client_init(mem_ctx, ev, sockpath, &client);
1178 D_ERR("ctdb_client_init() failed, ret=%d\n", ret);
1182 req = takeover_send(mem_ctx, ev, client, force_rebalance_nodes);
1184 D_ERR("takeover_send() failed\n");
1189 if (! tevent_req_poll(req, ev)) {
1190 D_ERR("tevent_req_poll() failed\n");
1195 takeover_recv(req, &ret);
1198 D_ERR("takeover run failed, ret=%d\n", ret);
1202 sys_write_v(write_fd, &ret, sizeof(ret));
1204 talloc_free(mem_ctx);