ctdb-daemon: Remove implementation of CTDB_CONTROL_KILL_TCP
[vlendec/samba-autobuild/.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
59 {
60         if (vnn->iface) {
61                 return vnn->iface->name;
62         }
63
64         return "__none__";
65 }
66
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
68 {
69         struct ctdb_interface *i;
70
71         if (strlen(iface) > CTDB_IFACE_SIZE) {
72                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
73                 return -1;
74         }
75
76         /* Verify that we don't have an entry for this ip yet */
77         for (i=ctdb->ifaces;i;i=i->next) {
78                 if (strcmp(i->name, iface) == 0) {
79                         return 0;
80                 }
81         }
82
83         /* create a new structure for this interface */
84         i = talloc_zero(ctdb, struct ctdb_interface);
85         CTDB_NO_MEMORY_FATAL(ctdb, i);
86         i->name = talloc_strdup(i, iface);
87         CTDB_NO_MEMORY(ctdb, i->name);
88
89         i->link_up = true;
90
91         DLIST_ADD(ctdb->ifaces, i);
92
93         return 0;
94 }
95
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
97                                         const char *name)
98 {
99         int n;
100
101         for (n = 0; vnn->ifaces[n] != NULL; n++) {
102                 if (strcmp(name, vnn->ifaces[n]) == 0) {
103                         return true;
104                 }
105         }
106
107         return false;
108 }
109
110 /* If any interfaces now have no possible IPs then delete them.  This
111  * implementation is naive (i.e. simple) rather than clever
112  * (i.e. complex).  Given that this is run on delip and that operation
113  * is rare, this doesn't need to be efficient - it needs to be
114  * foolproof.  One alternative is reference counting, where the logic
115  * is distributed and can, therefore, be broken in multiple places.
116  * Another alternative is to build a red-black tree of interfaces that
117  * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
118  * once) and then walking ctdb->ifaces once and deleting those not in
119  * the tree.  Let's go to one of those if the naive implementation
120  * causes problems...  :-)
121  */
122 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
123                                         struct ctdb_vnn *vnn)
124 {
125         struct ctdb_interface *i, *next;
126
127         /* For each interface, check if there's an IP using it. */
128         for (i = ctdb->ifaces; i != NULL; i = next) {
129                 struct ctdb_vnn *tv;
130                 bool found;
131                 next = i->next;
132
133                 /* Only consider interfaces named in the given VNN. */
134                 if (!vnn_has_interface_with_name(vnn, i->name)) {
135                         continue;
136                 }
137
138                 /* Is the "single IP" on this interface? */
139                 if ((ctdb->single_ip_vnn != NULL) &&
140                     (ctdb->single_ip_vnn->ifaces[0] != NULL) &&
141                     (strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
142                         /* Found, next interface please... */
143                         continue;
144                 }
145                 /* Search for a vnn with this interface. */
146                 found = false;
147                 for (tv=ctdb->vnn; tv; tv=tv->next) {
148                         if (vnn_has_interface_with_name(tv, i->name)) {
149                                 found = true;
150                                 break;
151                         }
152                 }
153
154                 if (!found) {
155                         /* None of the VNNs are using this interface. */
156                         DLIST_REMOVE(ctdb->ifaces, i);
157                         talloc_free(i);
158                 }
159         }
160 }
161
162
163 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
164                                               const char *iface)
165 {
166         struct ctdb_interface *i;
167
168         for (i=ctdb->ifaces;i;i=i->next) {
169                 if (strcmp(i->name, iface) == 0) {
170                         return i;
171                 }
172         }
173
174         return NULL;
175 }
176
177 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
178                                                   struct ctdb_vnn *vnn)
179 {
180         int i;
181         struct ctdb_interface *cur = NULL;
182         struct ctdb_interface *best = NULL;
183
184         for (i=0; vnn->ifaces[i]; i++) {
185
186                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
187                 if (cur == NULL) {
188                         continue;
189                 }
190
191                 if (!cur->link_up) {
192                         continue;
193                 }
194
195                 if (best == NULL) {
196                         best = cur;
197                         continue;
198                 }
199
200                 if (cur->references < best->references) {
201                         best = cur;
202                         continue;
203                 }
204         }
205
206         return best;
207 }
208
209 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
210                                      struct ctdb_vnn *vnn)
211 {
212         struct ctdb_interface *best = NULL;
213
214         if (vnn->iface) {
215                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
216                                    "still assigned to iface '%s'\n",
217                                    ctdb_addr_to_str(&vnn->public_address),
218                                    ctdb_vnn_iface_string(vnn)));
219                 return 0;
220         }
221
222         best = ctdb_vnn_best_iface(ctdb, vnn);
223         if (best == NULL) {
224                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
225                                   "cannot assign to iface any iface\n",
226                                   ctdb_addr_to_str(&vnn->public_address)));
227                 return -1;
228         }
229
230         vnn->iface = best;
231         best->references++;
232         vnn->pnn = ctdb->pnn;
233
234         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
235                            "now assigned to iface '%s' refs[%d]\n",
236                            ctdb_addr_to_str(&vnn->public_address),
237                            ctdb_vnn_iface_string(vnn),
238                            best->references));
239         return 0;
240 }
241
242 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
243                                     struct ctdb_vnn *vnn)
244 {
245         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
246                            "now unassigned (old iface '%s' refs[%d])\n",
247                            ctdb_addr_to_str(&vnn->public_address),
248                            ctdb_vnn_iface_string(vnn),
249                            vnn->iface?vnn->iface->references:0));
250         if (vnn->iface) {
251                 vnn->iface->references--;
252         }
253         vnn->iface = NULL;
254         if (vnn->pnn == ctdb->pnn) {
255                 vnn->pnn = -1;
256         }
257 }
258
259 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
260                                struct ctdb_vnn *vnn)
261 {
262         int i;
263
264         /* Nodes that are not RUNNING can not host IPs */
265         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
266                 return false;
267         }
268
269         if (vnn->delete_pending) {
270                 return false;
271         }
272
273         if (vnn->iface && vnn->iface->link_up) {
274                 return true;
275         }
276
277         for (i=0; vnn->ifaces[i]; i++) {
278                 struct ctdb_interface *cur;
279
280                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
281                 if (cur == NULL) {
282                         continue;
283                 }
284
285                 if (cur->link_up) {
286                         return true;
287                 }
288         }
289
290         return false;
291 }
292
293 struct ctdb_takeover_arp {
294         struct ctdb_context *ctdb;
295         uint32_t count;
296         ctdb_sock_addr addr;
297         struct ctdb_tcp_array *tcparray;
298         struct ctdb_vnn *vnn;
299 };
300
301
302 /*
303   lists of tcp endpoints
304  */
305 struct ctdb_tcp_list {
306         struct ctdb_tcp_list *prev, *next;
307         struct ctdb_connection connection;
308 };
309
310 /*
311   list of clients to kill on IP release
312  */
313 struct ctdb_client_ip {
314         struct ctdb_client_ip *prev, *next;
315         struct ctdb_context *ctdb;
316         ctdb_sock_addr addr;
317         uint32_t client_id;
318 };
319
320
321 /*
322   send a gratuitous arp
323  */
324 static void ctdb_control_send_arp(struct tevent_context *ev,
325                                   struct tevent_timer *te,
326                                   struct timeval t, void *private_data)
327 {
328         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
329                                                         struct ctdb_takeover_arp);
330         int i, ret;
331         struct ctdb_tcp_array *tcparray;
332         const char *iface = ctdb_vnn_iface_string(arp->vnn);
333
334         ret = ctdb_sys_send_arp(&arp->addr, iface);
335         if (ret != 0) {
336                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
337                                   iface, strerror(errno)));
338         }
339
340         tcparray = arp->tcparray;
341         if (tcparray) {
342                 for (i=0;i<tcparray->num;i++) {
343                         struct ctdb_connection *tcon;
344
345                         tcon = &tcparray->connections[i];
346                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
347                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
348                                 ctdb_addr_to_str(&tcon->src),
349                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
350                         ret = ctdb_sys_send_tcp(
351                                 &tcon->src,
352                                 &tcon->dst,
353                                 0, 0, 0);
354                         if (ret != 0) {
355                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
356                                         ctdb_addr_to_str(&tcon->src)));
357                         }
358                 }
359         }
360
361         arp->count++;
362
363         if (arp->count == CTDB_ARP_REPEAT) {
364                 talloc_free(arp);
365                 return;
366         }
367
368         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
369                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
370                          ctdb_control_send_arp, arp);
371 }
372
373 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
374                                        struct ctdb_vnn *vnn)
375 {
376         struct ctdb_takeover_arp *arp;
377         struct ctdb_tcp_array *tcparray;
378
379         if (!vnn->takeover_ctx) {
380                 vnn->takeover_ctx = talloc_new(vnn);
381                 if (!vnn->takeover_ctx) {
382                         return -1;
383                 }
384         }
385
386         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
387         if (!arp) {
388                 return -1;
389         }
390
391         arp->ctdb = ctdb;
392         arp->addr = vnn->public_address;
393         arp->vnn  = vnn;
394
395         tcparray = vnn->tcp_array;
396         if (tcparray) {
397                 /* add all of the known tcp connections for this IP to the
398                    list of tcp connections to send tickle acks for */
399                 arp->tcparray = talloc_steal(arp, tcparray);
400
401                 vnn->tcp_array = NULL;
402                 vnn->tcp_update_needed = true;
403         }
404
405         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
406                          timeval_zero(), ctdb_control_send_arp, arp);
407
408         return 0;
409 }
410
411 struct takeover_callback_state {
412         struct ctdb_req_control_old *c;
413         ctdb_sock_addr *addr;
414         struct ctdb_vnn *vnn;
415 };
416
417 struct ctdb_do_takeip_state {
418         struct ctdb_req_control_old *c;
419         struct ctdb_vnn *vnn;
420 };
421
422 /*
423   called when takeip event finishes
424  */
425 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
426                                     void *private_data)
427 {
428         struct ctdb_do_takeip_state *state =
429                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
430         int32_t ret;
431         TDB_DATA data;
432
433         if (status != 0) {
434                 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
435         
436                 if (status == -ETIME) {
437                         ctdb_ban_self(ctdb);
438                 }
439                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
440                                  ctdb_addr_to_str(&state->vnn->public_address),
441                                  ctdb_vnn_iface_string(state->vnn)));
442                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
443
444                 node->flags |= NODE_FLAGS_UNHEALTHY;
445                 talloc_free(state);
446                 return;
447         }
448
449         if (ctdb->do_checkpublicip) {
450
451         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
452         if (ret != 0) {
453                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
454                 talloc_free(state);
455                 return;
456         }
457
458         }
459
460         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
461         data.dsize = strlen((char *)data.dptr) + 1;
462         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
463
464         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
465
466
467         /* the control succeeded */
468         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
469         talloc_free(state);
470         return;
471 }
472
473 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
474 {
475         state->vnn->update_in_flight = false;
476         return 0;
477 }
478
479 /*
480   take over an ip address
481  */
482 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
483                               struct ctdb_req_control_old *c,
484                               struct ctdb_vnn *vnn)
485 {
486         int ret;
487         struct ctdb_do_takeip_state *state;
488
489         if (vnn->update_in_flight) {
490                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
491                                     "update for this IP already in flight\n",
492                                     ctdb_addr_to_str(&vnn->public_address),
493                                     vnn->public_netmask_bits));
494                 return -1;
495         }
496
497         ret = ctdb_vnn_assign_iface(ctdb, vnn);
498         if (ret != 0) {
499                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
500                                  "assign a usable interface\n",
501                                  ctdb_addr_to_str(&vnn->public_address),
502                                  vnn->public_netmask_bits));
503                 return -1;
504         }
505
506         state = talloc(vnn, struct ctdb_do_takeip_state);
507         CTDB_NO_MEMORY(ctdb, state);
508
509         state->c = talloc_steal(ctdb, c);
510         state->vnn   = vnn;
511
512         vnn->update_in_flight = true;
513         talloc_set_destructor(state, ctdb_takeip_destructor);
514
515         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
516                             ctdb_addr_to_str(&vnn->public_address),
517                             vnn->public_netmask_bits,
518                             ctdb_vnn_iface_string(vnn)));
519
520         ret = ctdb_event_script_callback(ctdb,
521                                          state,
522                                          ctdb_do_takeip_callback,
523                                          state,
524                                          CTDB_EVENT_TAKE_IP,
525                                          "%s %s %u",
526                                          ctdb_vnn_iface_string(vnn),
527                                          ctdb_addr_to_str(&vnn->public_address),
528                                          vnn->public_netmask_bits);
529
530         if (ret != 0) {
531                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
532                         ctdb_addr_to_str(&vnn->public_address),
533                         ctdb_vnn_iface_string(vnn)));
534                 talloc_free(state);
535                 return -1;
536         }
537
538         return 0;
539 }
540
541 struct ctdb_do_updateip_state {
542         struct ctdb_req_control_old *c;
543         struct ctdb_interface *old;
544         struct ctdb_vnn *vnn;
545 };
546
547 /*
548   called when updateip event finishes
549  */
550 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
551                                       void *private_data)
552 {
553         struct ctdb_do_updateip_state *state =
554                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
555         int32_t ret;
556
557         if (status != 0) {
558                 if (status == -ETIME) {
559                         ctdb_ban_self(ctdb);
560                 }
561                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
562                         ctdb_addr_to_str(&state->vnn->public_address),
563                         state->old->name,
564                         ctdb_vnn_iface_string(state->vnn)));
565
566                 /*
567                  * All we can do is reset the old interface
568                  * and let the next run fix it
569                  */
570                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
571                 state->vnn->iface = state->old;
572                 state->vnn->iface->references++;
573
574                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
575                 talloc_free(state);
576                 return;
577         }
578
579         if (ctdb->do_checkpublicip) {
580
581         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
582         if (ret != 0) {
583                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
584                 talloc_free(state);
585                 return;
586         }
587
588         }
589
590         /* the control succeeded */
591         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
592         talloc_free(state);
593         return;
594 }
595
596 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
597 {
598         state->vnn->update_in_flight = false;
599         return 0;
600 }
601
602 /*
603   update (move) an ip address
604  */
605 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
606                                 struct ctdb_req_control_old *c,
607                                 struct ctdb_vnn *vnn)
608 {
609         int ret;
610         struct ctdb_do_updateip_state *state;
611         struct ctdb_interface *old = vnn->iface;
612         const char *new_name;
613
614         if (vnn->update_in_flight) {
615                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
616                                     "update for this IP already in flight\n",
617                                     ctdb_addr_to_str(&vnn->public_address),
618                                     vnn->public_netmask_bits));
619                 return -1;
620         }
621
622         ctdb_vnn_unassign_iface(ctdb, vnn);
623         ret = ctdb_vnn_assign_iface(ctdb, vnn);
624         if (ret != 0) {
625                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
626                                  "assin a usable interface (old iface '%s')\n",
627                                  ctdb_addr_to_str(&vnn->public_address),
628                                  vnn->public_netmask_bits,
629                                  old->name));
630                 return -1;
631         }
632
633         new_name = ctdb_vnn_iface_string(vnn);
634         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
635                 /* A benign update from one interface onto itself.
636                  * no need to run the eventscripts in this case, just return
637                  * success.
638                  */
639                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
640                 return 0;
641         }
642
643         state = talloc(vnn, struct ctdb_do_updateip_state);
644         CTDB_NO_MEMORY(ctdb, state);
645
646         state->c = talloc_steal(ctdb, c);
647         state->old = old;
648         state->vnn = vnn;
649
650         vnn->update_in_flight = true;
651         talloc_set_destructor(state, ctdb_updateip_destructor);
652
653         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
654                             "interface %s to %s\n",
655                             ctdb_addr_to_str(&vnn->public_address),
656                             vnn->public_netmask_bits,
657                             old->name,
658                             new_name));
659
660         ret = ctdb_event_script_callback(ctdb,
661                                          state,
662                                          ctdb_do_updateip_callback,
663                                          state,
664                                          CTDB_EVENT_UPDATE_IP,
665                                          "%s %s %s %u",
666                                          state->old->name,
667                                          new_name,
668                                          ctdb_addr_to_str(&vnn->public_address),
669                                          vnn->public_netmask_bits);
670         if (ret != 0) {
671                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
672                                  ctdb_addr_to_str(&vnn->public_address),
673                                  old->name, new_name));
674                 talloc_free(state);
675                 return -1;
676         }
677
678         return 0;
679 }
680
681 /*
682   Find the vnn of the node that has a public ip address
683   returns -1 if the address is not known as a public address
684  */
685 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
686 {
687         struct ctdb_vnn *vnn;
688
689         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
690                 if (ctdb_same_ip(&vnn->public_address, addr)) {
691                         return vnn;
692                 }
693         }
694
695         return NULL;
696 }
697
698 /*
699   take over an ip address
700  */
701 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
702                                  struct ctdb_req_control_old *c,
703                                  TDB_DATA indata,
704                                  bool *async_reply)
705 {
706         int ret;
707         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
708         struct ctdb_vnn *vnn;
709         bool have_ip = false;
710         bool do_updateip = false;
711         bool do_takeip = false;
712         struct ctdb_interface *best_iface = NULL;
713
714         if (pip->pnn != ctdb->pnn) {
715                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
716                                  "with pnn %d, but we're node %d\n",
717                                  ctdb_addr_to_str(&pip->addr),
718                                  pip->pnn, ctdb->pnn));
719                 return -1;
720         }
721
722         /* update out vnn list */
723         vnn = find_public_ip_vnn(ctdb, &pip->addr);
724         if (vnn == NULL) {
725                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
726                         ctdb_addr_to_str(&pip->addr)));
727                 return 0;
728         }
729
730         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
731                 have_ip = ctdb_sys_have_ip(&pip->addr);
732         }
733         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
734         if (best_iface == NULL) {
735                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
736                                  "a usable interface (old %s, have_ip %d)\n",
737                                  ctdb_addr_to_str(&vnn->public_address),
738                                  vnn->public_netmask_bits,
739                                  ctdb_vnn_iface_string(vnn),
740                                  have_ip));
741                 return -1;
742         }
743
744         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
745                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
746                 have_ip = false;
747         }
748
749
750         if (vnn->iface == NULL && have_ip) {
751                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
752                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
753                                  ctdb_addr_to_str(&vnn->public_address)));
754                 return 0;
755         }
756
757         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
758                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759                                   "and we have it on iface[%s], but it was assigned to node %d"
760                                   "and we are node %d, banning ourself\n",
761                                  ctdb_addr_to_str(&vnn->public_address),
762                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
763                 ctdb_ban_self(ctdb);
764                 return -1;
765         }
766
767         if (vnn->pnn == -1 && have_ip) {
768                 vnn->pnn = ctdb->pnn;
769                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
770                                   "and we already have it on iface[%s], update local daemon\n",
771                                  ctdb_addr_to_str(&vnn->public_address),
772                                   ctdb_vnn_iface_string(vnn)));
773                 return 0;
774         }
775
776         if (vnn->iface) {
777                 if (vnn->iface != best_iface) {
778                         if (!vnn->iface->link_up) {
779                                 do_updateip = true;
780                         } else if (vnn->iface->references > (best_iface->references + 1)) {
781                                 /* only move when the rebalance gains something */
782                                         do_updateip = true;
783                         }
784                 }
785         }
786
787         if (!have_ip) {
788                 if (do_updateip) {
789                         ctdb_vnn_unassign_iface(ctdb, vnn);
790                         do_updateip = false;
791                 }
792                 do_takeip = true;
793         }
794
795         if (do_takeip) {
796                 ret = ctdb_do_takeip(ctdb, c, vnn);
797                 if (ret != 0) {
798                         return -1;
799                 }
800         } else if (do_updateip) {
801                 ret = ctdb_do_updateip(ctdb, c, vnn);
802                 if (ret != 0) {
803                         return -1;
804                 }
805         } else {
806                 /*
807                  * The interface is up and the kernel known the ip
808                  * => do nothing
809                  */
810                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
811                         ctdb_addr_to_str(&pip->addr),
812                         vnn->public_netmask_bits,
813                         ctdb_vnn_iface_string(vnn)));
814                 return 0;
815         }
816
817         /* tell ctdb_control.c that we will be replying asynchronously */
818         *async_reply = true;
819
820         return 0;
821 }
822
823 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
824 {
825         DLIST_REMOVE(ctdb->vnn, vnn);
826         ctdb_vnn_unassign_iface(ctdb, vnn);
827         ctdb_remove_orphaned_ifaces(ctdb, vnn);
828         talloc_free(vnn);
829 }
830
831 /*
832   called when releaseip event finishes
833  */
834 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
835                                 void *private_data)
836 {
837         struct takeover_callback_state *state = 
838                 talloc_get_type(private_data, struct takeover_callback_state);
839         TDB_DATA data;
840
841         if (status == -ETIME) {
842                 ctdb_ban_self(ctdb);
843         }
844
845         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
846                 if  (ctdb_sys_have_ip(state->addr)) {
847                         DEBUG(DEBUG_ERR,
848                               ("IP %s still hosted during release IP callback, failing\n",
849                                ctdb_addr_to_str(state->addr)));
850                         ctdb_request_control_reply(ctdb, state->c,
851                                                    NULL, -1, NULL);
852                         talloc_free(state);
853                         return;
854                 }
855         }
856
857         /* send a message to all clients of this node telling them
858            that the cluster has been reconfigured and they should
859            release any sockets on this IP */
860         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
861         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
862         data.dsize = strlen((char *)data.dptr)+1;
863
864         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
865
866         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
867
868         ctdb_vnn_unassign_iface(ctdb, state->vnn);
869
870         /* Process the IP if it has been marked for deletion */
871         if (state->vnn->delete_pending) {
872                 do_delete_ip(ctdb, state->vnn);
873                 state->vnn = NULL;
874         }
875
876         /* the control succeeded */
877         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
878         talloc_free(state);
879 }
880
881 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
882 {
883         if (state->vnn != NULL) {
884                 state->vnn->update_in_flight = false;
885         }
886         return 0;
887 }
888
889 /*
890   release an ip address
891  */
892 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
893                                 struct ctdb_req_control_old *c,
894                                 TDB_DATA indata, 
895                                 bool *async_reply)
896 {
897         int ret;
898         struct takeover_callback_state *state;
899         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
900         struct ctdb_vnn *vnn;
901         char *iface;
902
903         /* update our vnn list */
904         vnn = find_public_ip_vnn(ctdb, &pip->addr);
905         if (vnn == NULL) {
906                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
907                         ctdb_addr_to_str(&pip->addr)));
908                 return 0;
909         }
910         vnn->pnn = pip->pnn;
911
912         /* stop any previous arps */
913         talloc_free(vnn->takeover_ctx);
914         vnn->takeover_ctx = NULL;
915
916         /* Some ctdb tool commands (e.g. moveip) send
917          * lazy multicast to drop an IP from any node that isn't the
918          * intended new node.  The following causes makes ctdbd ignore
919          * a release for any address it doesn't host.
920          */
921         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
922                 if (!ctdb_sys_have_ip(&pip->addr)) {
923                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
924                                 ctdb_addr_to_str(&pip->addr),
925                                 vnn->public_netmask_bits,
926                                 ctdb_vnn_iface_string(vnn)));
927                         ctdb_vnn_unassign_iface(ctdb, vnn);
928                         return 0;
929                 }
930         } else {
931                 if (vnn->iface == NULL) {
932                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
933                                            ctdb_addr_to_str(&pip->addr),
934                                            vnn->public_netmask_bits));
935                         return 0;
936                 }
937         }
938
939         /* There is a potential race between take_ip and us because we
940          * update the VNN via a callback that run when the
941          * eventscripts have been run.  Avoid the race by allowing one
942          * update to be in flight at a time.
943          */
944         if (vnn->update_in_flight) {
945                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
946                                     "update for this IP already in flight\n",
947                                     ctdb_addr_to_str(&vnn->public_address),
948                                     vnn->public_netmask_bits));
949                 return -1;
950         }
951
952         iface = strdup(ctdb_vnn_iface_string(vnn));
953
954         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
955                 ctdb_addr_to_str(&pip->addr),
956                 vnn->public_netmask_bits,
957                 iface,
958                 pip->pnn));
959
960         state = talloc(ctdb, struct takeover_callback_state);
961         if (state == NULL) {
962                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
963                                __FILE__, __LINE__);
964                 free(iface);
965                 return -1;
966         }
967
968         state->c = talloc_steal(state, c);
969         state->addr = talloc(state, ctdb_sock_addr);       
970         if (state->addr == NULL) {
971                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
972                                __FILE__, __LINE__);
973                 free(iface);
974                 talloc_free(state);
975                 return -1;
976         }
977         *state->addr = pip->addr;
978         state->vnn   = vnn;
979
980         vnn->update_in_flight = true;
981         talloc_set_destructor(state, ctdb_releaseip_destructor);
982
983         ret = ctdb_event_script_callback(ctdb, 
984                                          state, release_ip_callback, state,
985                                          CTDB_EVENT_RELEASE_IP,
986                                          "%s %s %u",
987                                          iface,
988                                          ctdb_addr_to_str(&pip->addr),
989                                          vnn->public_netmask_bits);
990         free(iface);
991         if (ret != 0) {
992                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
993                         ctdb_addr_to_str(&pip->addr),
994                         ctdb_vnn_iface_string(vnn)));
995                 talloc_free(state);
996                 return -1;
997         }
998
999         /* tell the control that we will be reply asynchronously */
1000         *async_reply = true;
1001         return 0;
1002 }
1003
1004 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1005                                    ctdb_sock_addr *addr,
1006                                    unsigned mask, const char *ifaces,
1007                                    bool check_address)
1008 {
1009         struct ctdb_vnn      *vnn;
1010         uint32_t num = 0;
1011         char *tmp;
1012         const char *iface;
1013         int i;
1014         int ret;
1015
1016         tmp = strdup(ifaces);
1017         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1018                 if (!ctdb_sys_check_iface_exists(iface)) {
1019                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1020                         free(tmp);
1021                         return -1;
1022                 }
1023         }
1024         free(tmp);
1025
1026         /* Verify that we don't have an entry for this ip yet */
1027         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1028                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1029                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1030                                 ctdb_addr_to_str(addr)));
1031                         return -1;
1032                 }               
1033         }
1034
1035         /* create a new vnn structure for this ip address */
1036         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1037         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1038         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1039         tmp = talloc_strdup(vnn, ifaces);
1040         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1041         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1042                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1043                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1044                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1045                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1046                 num++;
1047         }
1048         talloc_free(tmp);
1049         vnn->ifaces[num] = NULL;
1050         vnn->public_address      = *addr;
1051         vnn->public_netmask_bits = mask;
1052         vnn->pnn                 = -1;
1053         if (check_address) {
1054                 if (ctdb_sys_have_ip(addr)) {
1055                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1056                         vnn->pnn = ctdb->pnn;
1057                 }
1058         }
1059
1060         for (i=0; vnn->ifaces[i]; i++) {
1061                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1062                 if (ret != 0) {
1063                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1064                                            "for public_address[%s]\n",
1065                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1066                         talloc_free(vnn);
1067                         return -1;
1068                 }
1069         }
1070
1071         DLIST_ADD(ctdb->vnn, vnn);
1072
1073         return 0;
1074 }
1075
1076 /*
1077   setup the public address lists from a file
1078 */
1079 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1080 {
1081         char **lines;
1082         int nlines;
1083         int i;
1084
1085         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1086         if (lines == NULL) {
1087                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1088                 return -1;
1089         }
1090         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1091                 nlines--;
1092         }
1093
1094         for (i=0;i<nlines;i++) {
1095                 unsigned mask;
1096                 ctdb_sock_addr addr;
1097                 const char *addrstr;
1098                 const char *ifaces;
1099                 char *tok, *line;
1100
1101                 line = lines[i];
1102                 while ((*line == ' ') || (*line == '\t')) {
1103                         line++;
1104                 }
1105                 if (*line == '#') {
1106                         continue;
1107                 }
1108                 if (strcmp(line, "") == 0) {
1109                         continue;
1110                 }
1111                 tok = strtok(line, " \t");
1112                 addrstr = tok;
1113                 tok = strtok(NULL, " \t");
1114                 if (tok == NULL) {
1115                         if (NULL == ctdb->default_public_interface) {
1116                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1117                                          i+1));
1118                                 talloc_free(lines);
1119                                 return -1;
1120                         }
1121                         ifaces = ctdb->default_public_interface;
1122                 } else {
1123                         ifaces = tok;
1124                 }
1125
1126                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1127                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1128                         talloc_free(lines);
1129                         return -1;
1130                 }
1131                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1132                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1133                         talloc_free(lines);
1134                         return -1;
1135                 }
1136         }
1137
1138
1139         talloc_free(lines);
1140         return 0;
1141 }
1142
1143 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1144                               const char *iface,
1145                               const char *ip)
1146 {
1147         struct ctdb_vnn *svnn;
1148         struct ctdb_interface *cur = NULL;
1149         bool ok;
1150         int ret;
1151
1152         svnn = talloc_zero(ctdb, struct ctdb_vnn);
1153         CTDB_NO_MEMORY(ctdb, svnn);
1154
1155         svnn->ifaces = talloc_array(svnn, const char *, 2);
1156         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1157         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1158         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1159         svnn->ifaces[1] = NULL;
1160
1161         ok = parse_ip(ip, iface, 0, &svnn->public_address);
1162         if (!ok) {
1163                 talloc_free(svnn);
1164                 return -1;
1165         }
1166
1167         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1168         if (ret != 0) {
1169                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1170                                    "for single_ip[%s]\n",
1171                                    svnn->ifaces[0],
1172                                    ctdb_addr_to_str(&svnn->public_address)));
1173                 talloc_free(svnn);
1174                 return -1;
1175         }
1176
1177         /* assume the single public ip interface is initially "good" */
1178         cur = ctdb_find_iface(ctdb, iface);
1179         if (cur == NULL) {
1180                 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1181                 return -1;
1182         }
1183         cur->link_up = true;
1184
1185         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1186         if (ret != 0) {
1187                 talloc_free(svnn);
1188                 return -1;
1189         }
1190
1191         ctdb->single_ip_vnn = svnn;
1192         return 0;
1193 }
1194
1195 static void *add_ip_callback(void *parm, void *data)
1196 {
1197         struct public_ip_list *this_ip = parm;
1198         struct public_ip_list *prev_ip = data;
1199
1200         if (prev_ip == NULL) {
1201                 return parm;
1202         }
1203         if (this_ip->pnn == -1) {
1204                 this_ip->pnn = prev_ip->pnn;
1205         }
1206
1207         return parm;
1208 }
1209
1210 static int getips_count_callback(void *param, void *data)
1211 {
1212         struct public_ip_list **ip_list = (struct public_ip_list **)param;
1213         struct public_ip_list *new_ip = (struct public_ip_list *)data;
1214
1215         new_ip->next = *ip_list;
1216         *ip_list     = new_ip;
1217         return 0;
1218 }
1219
1220 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1221                                        struct ctdb_public_ip_list *ips,
1222                                        uint32_t pnn);
1223
1224 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1225                                          struct ipalloc_state *ipalloc_state,
1226                                          struct ctdb_node_map_old *nodemap)
1227 {
1228         int j;
1229         int ret;
1230         struct ctdb_public_ip_list_old *ip_list;
1231
1232         if (ipalloc_state->num != nodemap->num) {
1233                 DEBUG(DEBUG_ERR,
1234                       (__location__
1235                        " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1236                        ipalloc_state->num, nodemap->num));
1237                 return -1;
1238         }
1239
1240         for (j=0; j<nodemap->num; j++) {
1241                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1242                         continue;
1243                 }
1244
1245                 /* Retrieve the list of known public IPs from the node */
1246                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1247                                         TAKEOVER_TIMEOUT(),
1248                                         j,
1249                                         ipalloc_state->known_public_ips,
1250                                         0,
1251                                         &ip_list);
1252                 if (ret != 0) {
1253                         DEBUG(DEBUG_ERR,
1254                               ("Failed to read known public IPs from node: %u\n",
1255                                j));
1256                         return -1;
1257                 }
1258                 ipalloc_state->known_public_ips[j].num = ip_list->num;
1259                 /* This could be copied and freed.  However, ip_list
1260                  * is allocated off ipalloc_state->known_public_ips,
1261                  * so this is a safe hack.  This will go away in a
1262                  * while anyway... */
1263                 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1264
1265                 if (ctdb->do_checkpublicip) {
1266                         verify_remote_ip_allocation(
1267                                 ctdb,
1268                                 &ipalloc_state->known_public_ips[j],
1269                                 j);
1270                 }
1271
1272                 /* Retrieve the list of available public IPs from the node */
1273                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1274                                         TAKEOVER_TIMEOUT(),
1275                                         j,
1276                                         ipalloc_state->available_public_ips,
1277                                         CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1278                                         &ip_list);
1279                 if (ret != 0) {
1280                         DEBUG(DEBUG_ERR,
1281                               ("Failed to read available public IPs from node: %u\n",
1282                                j));
1283                         return -1;
1284                 }
1285                 ipalloc_state->available_public_ips[j].num = ip_list->num;
1286                 /* This could be copied and freed.  However, ip_list
1287                  * is allocated off ipalloc_state->available_public_ips,
1288                  * so this is a safe hack.  This will go away in a
1289                  * while anyway... */
1290                 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1291         }
1292
1293         return 0;
1294 }
1295
1296 static struct public_ip_list *
1297 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1298 {
1299         int i, j;
1300         struct public_ip_list *ip_list;
1301         struct ctdb_public_ip_list *public_ips;
1302
1303         TALLOC_FREE(ctdb->ip_tree);
1304         ctdb->ip_tree = trbt_create(ctdb, 0);
1305
1306         for (i=0; i < ctdb->num_nodes; i++) {
1307
1308                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1309                         continue;
1310                 }
1311
1312                 /* there were no public ips for this node */
1313                 if (ipalloc_state->known_public_ips == NULL) {
1314                         continue;
1315                 }
1316
1317                 public_ips = &ipalloc_state->known_public_ips[i];
1318
1319                 for (j=0; j < public_ips->num; j++) {
1320                         struct public_ip_list *tmp_ip;
1321
1322                         tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1323                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1324                         /* Do not use information about IP addresses hosted
1325                          * on other nodes, it may not be accurate */
1326                         if (public_ips->ip[j].pnn == ctdb->nodes[i]->pnn) {
1327                                 tmp_ip->pnn = public_ips->ip[j].pnn;
1328                         } else {
1329                                 tmp_ip->pnn = -1;
1330                         }
1331                         tmp_ip->addr = public_ips->ip[j].addr;
1332                         tmp_ip->next = NULL;
1333
1334                         trbt_insertarray32_callback(ctdb->ip_tree,
1335                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1336                                 add_ip_callback,
1337                                 tmp_ip);
1338                 }
1339         }
1340
1341         ip_list = NULL;
1342         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1343
1344         return ip_list;
1345 }
1346
1347 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1348 {
1349         int i;
1350
1351         for (i=0;i<nodemap->num;i++) {
1352                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1353                         /* Found one completely healthy node */
1354                         return false;
1355                 }
1356         }
1357
1358         return true;
1359 }
1360
1361 struct get_tunable_callback_data {
1362         const char *tunable;
1363         uint32_t *out;
1364         bool fatal;
1365 };
1366
1367 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1368                                  int32_t res, TDB_DATA outdata,
1369                                  void *callback)
1370 {
1371         struct get_tunable_callback_data *cd =
1372                 (struct get_tunable_callback_data *)callback;
1373         int size;
1374
1375         if (res != 0) {
1376                 /* Already handled in fail callback */
1377                 return;
1378         }
1379
1380         if (outdata.dsize != sizeof(uint32_t)) {
1381                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1382                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1383                                  (int)outdata.dsize));
1384                 cd->fatal = true;
1385                 return;
1386         }
1387
1388         size = talloc_array_length(cd->out);
1389         if (pnn >= size) {
1390                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1391                                  cd->tunable, pnn, size));
1392                 return;
1393         }
1394
1395                 
1396         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1397 }
1398
1399 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1400                                        int32_t res, TDB_DATA outdata,
1401                                        void *callback)
1402 {
1403         struct get_tunable_callback_data *cd =
1404                 (struct get_tunable_callback_data *)callback;
1405
1406         switch (res) {
1407         case -ETIME:
1408                 DEBUG(DEBUG_ERR,
1409                       ("Timed out getting tunable \"%s\" from node %d\n",
1410                        cd->tunable, pnn));
1411                 cd->fatal = true;
1412                 break;
1413         case -EINVAL:
1414         case -1:
1415                 DEBUG(DEBUG_WARNING,
1416                       ("Tunable \"%s\" not implemented on node %d\n",
1417                        cd->tunable, pnn));
1418                 break;
1419         default:
1420                 DEBUG(DEBUG_ERR,
1421                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1422                        cd->tunable, pnn));
1423                 cd->fatal = true;
1424         }
1425 }
1426
1427 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1428                                         TALLOC_CTX *tmp_ctx,
1429                                         struct ctdb_node_map_old *nodemap,
1430                                         const char *tunable,
1431                                         uint32_t default_value)
1432 {
1433         TDB_DATA data;
1434         struct ctdb_control_get_tunable *t;
1435         uint32_t *nodes;
1436         uint32_t *tvals;
1437         struct get_tunable_callback_data callback_data;
1438         int i;
1439
1440         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1441         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1442         for (i=0; i<nodemap->num; i++) {
1443                 tvals[i] = default_value;
1444         }
1445                 
1446         callback_data.out = tvals;
1447         callback_data.tunable = tunable;
1448         callback_data.fatal = false;
1449
1450         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1451         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1452         t = (struct ctdb_control_get_tunable *)data.dptr;
1453         t->length = strlen(tunable)+1;
1454         memcpy(t->name, tunable, t->length);
1455         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1456         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1457                                       nodes, 0, TAKEOVER_TIMEOUT(),
1458                                       false, data,
1459                                       get_tunable_callback,
1460                                       get_tunable_fail_callback,
1461                                       &callback_data) != 0) {
1462                 if (callback_data.fatal) {
1463                         talloc_free(tvals);
1464                         tvals = NULL;
1465                 }
1466         }
1467         talloc_free(nodes);
1468         talloc_free(data.dptr);
1469
1470         return tvals;
1471 }
1472
1473 /* Set internal flags for IP allocation:
1474  *   Clear ip flags
1475  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1476  *   Set NOIPHOST ip flag for each INACTIVE node
1477  *   if all nodes are disabled:
1478  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1479  *   else
1480  *     Set NOIPHOST ip flags for disabled nodes
1481  */
1482 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1483                                  struct ctdb_node_map_old *nodemap,
1484                                  uint32_t *tval_noiptakeover,
1485                                  uint32_t *tval_noiphostonalldisabled)
1486 {
1487         int i;
1488
1489         for (i=0;i<nodemap->num;i++) {
1490                 /* Can not take IPs on node with NoIPTakeover set */
1491                 if (tval_noiptakeover[i] != 0) {
1492                         ipalloc_state->noiptakeover[i] = true;
1493                 }
1494
1495                 /* Can not host IPs on INACTIVE node */
1496                 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1497                         ipalloc_state->noiphost[i] = true;
1498                 }
1499         }
1500
1501         if (all_nodes_are_disabled(nodemap)) {
1502                 /* If all nodes are disabled, can not host IPs on node
1503                  * with NoIPHostOnAllDisabled set
1504                  */
1505                 for (i=0;i<nodemap->num;i++) {
1506                         if (tval_noiphostonalldisabled[i] != 0) {
1507                                 ipalloc_state->noiphost[i] = true;
1508                         }
1509                 }
1510         } else {
1511                 /* If some nodes are not disabled, then can not host
1512                  * IPs on DISABLED node
1513                  */
1514                 for (i=0;i<nodemap->num;i++) {
1515                         if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1516                                 ipalloc_state->noiphost[i] = true;
1517                         }
1518                 }
1519         }
1520 }
1521
1522 static bool set_ipflags(struct ctdb_context *ctdb,
1523                         struct ipalloc_state *ipalloc_state,
1524                         struct ctdb_node_map_old *nodemap)
1525 {
1526         uint32_t *tval_noiptakeover;
1527         uint32_t *tval_noiphostonalldisabled;
1528
1529         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1530                                                    "NoIPTakeover", 0);
1531         if (tval_noiptakeover == NULL) {
1532                 return false;
1533         }
1534
1535         tval_noiphostonalldisabled =
1536                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1537                                        "NoIPHostOnAllDisabled", 0);
1538         if (tval_noiphostonalldisabled == NULL) {
1539                 /* Caller frees tmp_ctx */
1540                 return false;
1541         }
1542
1543         set_ipflags_internal(ipalloc_state, nodemap,
1544                              tval_noiptakeover,
1545                              tval_noiphostonalldisabled);
1546
1547         talloc_free(tval_noiptakeover);
1548         talloc_free(tval_noiphostonalldisabled);
1549
1550         return true;
1551 }
1552
1553 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1554                                                  TALLOC_CTX *mem_ctx)
1555 {
1556         struct ipalloc_state *ipalloc_state =
1557                 talloc_zero(mem_ctx, struct ipalloc_state);
1558         if (ipalloc_state == NULL) {
1559                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1560                 return NULL;
1561         }
1562
1563         ipalloc_state->num = ctdb->num_nodes;
1564
1565         ipalloc_state->known_public_ips =
1566                 talloc_zero_array(ipalloc_state,
1567                                   struct ctdb_public_ip_list,
1568                                   ipalloc_state->num);
1569         if (ipalloc_state->known_public_ips == NULL) {
1570                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1571                 goto fail;
1572         }
1573
1574         ipalloc_state->available_public_ips =
1575                 talloc_zero_array(ipalloc_state,
1576                                   struct ctdb_public_ip_list,
1577                                   ipalloc_state->num);
1578         if (ipalloc_state->available_public_ips == NULL) {
1579                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1580                 goto fail;
1581         }
1582         ipalloc_state->noiptakeover =
1583                 talloc_zero_array(ipalloc_state,
1584                                   bool,
1585                                   ipalloc_state->num);
1586         if (ipalloc_state->noiptakeover == NULL) {
1587                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1588                 goto fail;
1589         }
1590         ipalloc_state->noiphost =
1591                 talloc_zero_array(ipalloc_state,
1592                                   bool,
1593                                   ipalloc_state->num);
1594         if (ipalloc_state->noiphost == NULL) {
1595                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1596                 goto fail;
1597         }
1598
1599         if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1600                 ipalloc_state->algorithm = IPALLOC_LCP2;
1601         } else if (1 == ctdb->tunable.deterministic_public_ips) {
1602                 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1603         } else {
1604                 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1605         }
1606
1607         ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1608
1609         return ipalloc_state;
1610 fail:
1611         talloc_free(ipalloc_state);
1612         return NULL;
1613 }
1614
1615 struct iprealloc_callback_data {
1616         bool *retry_nodes;
1617         int retry_count;
1618         client_async_callback fail_callback;
1619         void *fail_callback_data;
1620         struct ctdb_node_map_old *nodemap;
1621 };
1622
1623 static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1624                                         int32_t res, TDB_DATA outdata,
1625                                         void *callback)
1626 {
1627         int numnodes;
1628         struct iprealloc_callback_data *cd =
1629                 (struct iprealloc_callback_data *)callback;
1630
1631         numnodes = talloc_array_length(cd->retry_nodes);
1632         if (pnn > numnodes) {
1633                 DEBUG(DEBUG_ERR,
1634                       ("ipreallocated failure from node %d, "
1635                        "but only %d nodes in nodemap\n",
1636                        pnn, numnodes));
1637                 return;
1638         }
1639
1640         /* Can't run the "ipreallocated" event on a INACTIVE node */
1641         if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
1642                 DEBUG(DEBUG_WARNING,
1643                       ("ipreallocated failed on inactive node %d, ignoring\n",
1644                        pnn));
1645                 return;
1646         }
1647
1648         switch (res) {
1649         case -ETIME:
1650                 /* If the control timed out then that's a real error,
1651                  * so call the real fail callback
1652                  */
1653                 if (cd->fail_callback) {
1654                         cd->fail_callback(ctdb, pnn, res, outdata,
1655                                           cd->fail_callback_data);
1656                 } else {
1657                         DEBUG(DEBUG_WARNING,
1658                               ("iprealloc timed out but no callback registered\n"));
1659                 }
1660                 break;
1661         default:
1662                 /* If not a timeout then either the ipreallocated
1663                  * eventscript (or some setup) failed.  This might
1664                  * have failed because the IPREALLOCATED control isn't
1665                  * implemented - right now there is no way of knowing
1666                  * because the error codes are all folded down to -1.
1667                  * Consider retrying using EVENTSCRIPT control...
1668                  */
1669                 DEBUG(DEBUG_WARNING,
1670                       ("ipreallocated failure from node %d, flagging retry\n",
1671                        pnn));
1672                 cd->retry_nodes[pnn] = true;
1673                 cd->retry_count++;
1674         }
1675 }
1676
1677 struct takeover_callback_data {
1678         bool *node_failed;
1679         client_async_callback fail_callback;
1680         void *fail_callback_data;
1681         struct ctdb_node_map_old *nodemap;
1682 };
1683
1684 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1685                                        uint32_t node_pnn, int32_t res,
1686                                        TDB_DATA outdata, void *callback_data)
1687 {
1688         struct takeover_callback_data *cd =
1689                 talloc_get_type_abort(callback_data,
1690                                       struct takeover_callback_data);
1691         int i;
1692
1693         for (i = 0; i < cd->nodemap->num; i++) {
1694                 if (node_pnn == cd->nodemap->nodes[i].pnn) {
1695                         break;
1696                 }
1697         }
1698
1699         if (i == cd->nodemap->num) {
1700                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1701                 return;
1702         }
1703
1704         if (!cd->node_failed[i]) {
1705                 cd->node_failed[i] = true;
1706                 cd->fail_callback(ctdb, node_pnn, res, outdata,
1707                                   cd->fail_callback_data);
1708         }
1709 }
1710
1711 /*
1712  * Recalculate the allocation of public IPs to nodes and have the
1713  * nodes host their allocated addresses.
1714  *
1715  * - Allocate memory for IP allocation state, including per node
1716  *   arrays
1717  * - Populate IP allocation algorithm in IP allocation state
1718  * - Populate local value of tunable NoIPFailback in IP allocation
1719      state - this is really a cluster-wide configuration variable and
1720      only the value form the master node is used
1721  * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1722  *   connected nodes - this is done separately so tunable values can
1723  *   be faked in unit testing
1724  * - Populate NoIPTakover tunable in IP allocation state
1725  * - Populate NoIPHost in IP allocation state, derived from node flags
1726  *   and NoIPHostOnAllDisabled tunable
1727  * - Retrieve and populate known and available IP lists in IP
1728  *   allocation state
1729  * - If no available IP addresses then early exit
1730  * - Build list of (known IPs, currently assigned node)
1731  * - Populate list of nodes to force rebalance - internal structure,
1732  *   currently no way to fetch, only used by LCP2 for nodes that have
1733  *   had new IP addresses added
1734  * - Run IP allocation algorithm
1735  * - Send RELEASE_IP to all nodes for IPs they should not host
1736  * - Send TAKE_IP to all nodes for IPs they should host
1737  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1738  */
1739 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1740                       uint32_t *force_rebalance_nodes,
1741                       client_async_callback fail_callback, void *callback_data)
1742 {
1743         int i, j, ret;
1744         struct ctdb_public_ip ip;
1745         uint32_t *nodes;
1746         struct public_ip_list *all_ips, *tmp_ip;
1747         TDB_DATA data;
1748         struct timeval timeout;
1749         struct client_async_data *async_data;
1750         struct ctdb_client_control_state *state;
1751         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1752         struct ipalloc_state *ipalloc_state;
1753         struct takeover_callback_data *takeover_data;
1754         struct iprealloc_callback_data iprealloc_data;
1755         bool *retry_data;
1756         bool can_host_ips;
1757
1758         /*
1759          * ip failover is completely disabled, just send out the 
1760          * ipreallocated event.
1761          */
1762         if (ctdb->tunable.disable_ip_failover != 0) {
1763                 goto ipreallocated;
1764         }
1765
1766         ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1767         if (ipalloc_state == NULL) {
1768                 talloc_free(tmp_ctx);
1769                 return -1;
1770         }
1771
1772         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1773                 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1774                 talloc_free(tmp_ctx);
1775                 return -1;
1776         }
1777
1778         /* Fetch known/available public IPs from each active node */
1779         ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1780         if (ret != 0) {
1781                 talloc_free(tmp_ctx);
1782                 return -1;
1783         }
1784
1785         /* Short-circuit IP allocation if no node has available IPs */
1786         can_host_ips = false;
1787         for (i=0; i < ipalloc_state->num; i++) {
1788                 if (ipalloc_state->available_public_ips[i].num != 0) {
1789                         can_host_ips = true;
1790                 }
1791         }
1792         if (!can_host_ips) {
1793                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1794                 return 0;
1795         }
1796
1797         /* since nodes only know about those public addresses that
1798            can be served by that particular node, no single node has
1799            a full list of all public addresses that exist in the cluster.
1800            Walk over all node structures and create a merged list of
1801            all public addresses that exist in the cluster.
1802
1803            keep the tree of ips around as ctdb->ip_tree
1804         */
1805         all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1806         ipalloc_state->all_ips = all_ips;
1807
1808         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1809
1810         /* Do the IP reassignment calculations */
1811         ipalloc(ipalloc_state);
1812
1813         /* Now tell all nodes to release any public IPs should not
1814          * host.  This will be a NOOP on nodes that don't currently
1815          * hold the given IP.
1816          */
1817         takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
1818         CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
1819
1820         takeover_data->node_failed = talloc_zero_array(tmp_ctx,
1821                                                        bool, nodemap->num);
1822         CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
1823         takeover_data->fail_callback = fail_callback;
1824         takeover_data->fail_callback_data = callback_data;
1825         takeover_data->nodemap = nodemap;
1826
1827         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1828         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1829
1830         async_data->fail_callback = takeover_run_fail_callback;
1831         async_data->callback_data = takeover_data;
1832
1833         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1834
1835         /* Send a RELEASE_IP to all nodes that should not be hosting
1836          * each IP.  For each IP, all but one of these will be
1837          * redundant.  However, the redundant ones are used to tell
1838          * nodes which node should be hosting the IP so that commands
1839          * like "ctdb ip" can display a particular nodes idea of who
1840          * is hosting what. */
1841         for (i=0;i<nodemap->num;i++) {
1842                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1843                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1844                         continue;
1845                 }
1846
1847                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1848                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1849                                 /* This node should be serving this
1850                                    vnn so don't tell it to release the ip
1851                                 */
1852                                 continue;
1853                         }
1854                         ip.pnn  = tmp_ip->pnn;
1855                         ip.addr = tmp_ip->addr;
1856
1857                         timeout = TAKEOVER_TIMEOUT();
1858                         data.dsize = sizeof(ip);
1859                         data.dptr  = (uint8_t *)&ip;
1860                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1861                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1862                                                   data, async_data,
1863                                                   &timeout, NULL);
1864                         if (state == NULL) {
1865                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1866                                 talloc_free(tmp_ctx);
1867                                 return -1;
1868                         }
1869
1870                         ctdb_client_async_add(async_data, state);
1871                 }
1872         }
1873         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1874                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1875                 talloc_free(tmp_ctx);
1876                 return -1;
1877         }
1878         talloc_free(async_data);
1879
1880
1881         /* For each IP, send a TAKOVER_IP to the node that should be
1882          * hosting it.  Many of these will often be redundant (since
1883          * the allocation won't have changed) but they can be useful
1884          * to recover from inconsistencies. */
1885         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1886         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1887
1888         async_data->fail_callback = fail_callback;
1889         async_data->callback_data = callback_data;
1890
1891         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1892                 if (tmp_ip->pnn == -1) {
1893                         /* this IP won't be taken over */
1894                         continue;
1895                 }
1896
1897                 ip.pnn  = tmp_ip->pnn;
1898                 ip.addr = tmp_ip->addr;
1899
1900                 timeout = TAKEOVER_TIMEOUT();
1901                 data.dsize = sizeof(ip);
1902                 data.dptr  = (uint8_t *)&ip;
1903                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1904                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1905                                           data, async_data, &timeout, NULL);
1906                 if (state == NULL) {
1907                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1908                         talloc_free(tmp_ctx);
1909                         return -1;
1910                 }
1911
1912                 ctdb_client_async_add(async_data, state);
1913         }
1914         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1915                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1916                 talloc_free(tmp_ctx);
1917                 return -1;
1918         }
1919
1920 ipreallocated:
1921         /*
1922          * Tell all nodes to run eventscripts to process the
1923          * "ipreallocated" event.  This can do a lot of things,
1924          * including restarting services to reconfigure them if public
1925          * IPs have moved.  Once upon a time this event only used to
1926          * update natgw.
1927          */
1928         retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
1929         CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
1930         iprealloc_data.retry_nodes = retry_data;
1931         iprealloc_data.retry_count = 0;
1932         iprealloc_data.fail_callback = fail_callback;
1933         iprealloc_data.fail_callback_data = callback_data;
1934         iprealloc_data.nodemap = nodemap;
1935
1936         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1937         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1938                                         nodes, 0, TAKEOVER_TIMEOUT(),
1939                                         false, tdb_null,
1940                                         NULL, iprealloc_fail_callback,
1941                                         &iprealloc_data);
1942         if (ret != 0) {
1943                 /* If the control failed then we should retry to any
1944                  * nodes flagged by iprealloc_fail_callback using the
1945                  * EVENTSCRIPT control.  This is a best-effort at
1946                  * backward compatiblity when running a mixed cluster
1947                  * where some nodes have not yet been upgraded to
1948                  * support the IPREALLOCATED control.
1949                  */
1950                 DEBUG(DEBUG_WARNING,
1951                       ("Retry ipreallocated to some nodes using eventscript control\n"));
1952
1953                 nodes = talloc_array(tmp_ctx, uint32_t,
1954                                      iprealloc_data.retry_count);
1955                 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
1956
1957                 j = 0;
1958                 for (i=0; i<nodemap->num; i++) {
1959                         if (iprealloc_data.retry_nodes[i]) {
1960                                 nodes[j] = i;
1961                                 j++;
1962                         }
1963                 }
1964
1965                 data.dptr  = discard_const("ipreallocated");
1966                 data.dsize = strlen((char *)data.dptr) + 1; 
1967                 ret = ctdb_client_async_control(ctdb,
1968                                                 CTDB_CONTROL_RUN_EVENTSCRIPTS,
1969                                                 nodes, 0, TAKEOVER_TIMEOUT(),
1970                                                 false, data,
1971                                                 NULL, fail_callback,
1972                                                 callback_data);
1973                 if (ret != 0) {
1974                         DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
1975                 }
1976         }
1977
1978         talloc_free(tmp_ctx);
1979         return ret;
1980 }
1981
1982
1983 /*
1984   destroy a ctdb_client_ip structure
1985  */
1986 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1987 {
1988         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1989                 ctdb_addr_to_str(&ip->addr),
1990                 ntohs(ip->addr.ip.sin_port),
1991                 ip->client_id));
1992
1993         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1994         return 0;
1995 }
1996
1997 /*
1998   called by a client to inform us of a TCP connection that it is managing
1999   that should tickled with an ACK when IP takeover is done
2000  */
2001 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2002                                 TDB_DATA indata)
2003 {
2004         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
2005         struct ctdb_connection *tcp_sock = NULL;
2006         struct ctdb_tcp_list *tcp;
2007         struct ctdb_connection t;
2008         int ret;
2009         TDB_DATA data;
2010         struct ctdb_client_ip *ip;
2011         struct ctdb_vnn *vnn;
2012         ctdb_sock_addr addr;
2013
2014         /* If we don't have public IPs, tickles are useless */
2015         if (ctdb->vnn == NULL) {
2016                 return 0;
2017         }
2018
2019         tcp_sock = (struct ctdb_connection *)indata.dptr;
2020
2021         addr = tcp_sock->src;
2022         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
2023         addr = tcp_sock->dst;
2024         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
2025
2026         ZERO_STRUCT(addr);
2027         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
2028         vnn = find_public_ip_vnn(ctdb, &addr);
2029         if (vnn == NULL) {
2030                 switch (addr.sa.sa_family) {
2031                 case AF_INET:
2032                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2033                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
2034                                         ctdb_addr_to_str(&addr)));
2035                         }
2036                         break;
2037                 case AF_INET6:
2038                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
2039                                 ctdb_addr_to_str(&addr)));
2040                         break;
2041                 default:
2042                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2043                 }
2044
2045                 return 0;
2046         }
2047
2048         if (vnn->pnn != ctdb->pnn) {
2049                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2050                         ctdb_addr_to_str(&addr),
2051                         client_id, client->pid));
2052                 /* failing this call will tell smbd to die */
2053                 return -1;
2054         }
2055
2056         ip = talloc(client, struct ctdb_client_ip);
2057         CTDB_NO_MEMORY(ctdb, ip);
2058
2059         ip->ctdb      = ctdb;
2060         ip->addr      = addr;
2061         ip->client_id = client_id;
2062         talloc_set_destructor(ip, ctdb_client_ip_destructor);
2063         DLIST_ADD(ctdb->client_ip_list, ip);
2064
2065         tcp = talloc(client, struct ctdb_tcp_list);
2066         CTDB_NO_MEMORY(ctdb, tcp);
2067
2068         tcp->connection.src = tcp_sock->src;
2069         tcp->connection.dst = tcp_sock->dst;
2070
2071         DLIST_ADD(client->tcp_list, tcp);
2072
2073         t.src = tcp_sock->src;
2074         t.dst = tcp_sock->dst;
2075
2076         data.dptr = (uint8_t *)&t;
2077         data.dsize = sizeof(t);
2078
2079         switch (addr.sa.sa_family) {
2080         case AF_INET:
2081                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2082                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
2083                         ctdb_addr_to_str(&tcp_sock->src),
2084                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2085                 break;
2086         case AF_INET6:
2087                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2088                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
2089                         ctdb_addr_to_str(&tcp_sock->src),
2090                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2091                 break;
2092         default:
2093                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2094         }
2095
2096
2097         /* tell all nodes about this tcp connection */
2098         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2099                                        CTDB_CONTROL_TCP_ADD,
2100                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2101         if (ret != 0) {
2102                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2103                 return -1;
2104         }
2105
2106         return 0;
2107 }
2108
2109 /*
2110   find a tcp address on a list
2111  */
2112 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2113                                            struct ctdb_connection *tcp)
2114 {
2115         int i;
2116
2117         if (array == NULL) {
2118                 return NULL;
2119         }
2120
2121         for (i=0;i<array->num;i++) {
2122                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2123                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2124                         return &array->connections[i];
2125                 }
2126         }
2127         return NULL;
2128 }
2129
2130
2131
2132 /*
2133   called by a daemon to inform us of a TCP connection that one of its
2134   clients managing that should tickled with an ACK when IP takeover is
2135   done
2136  */
2137 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2138 {
2139         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2140         struct ctdb_tcp_array *tcparray;
2141         struct ctdb_connection tcp;
2142         struct ctdb_vnn *vnn;
2143
2144         /* If we don't have public IPs, tickles are useless */
2145         if (ctdb->vnn == NULL) {
2146                 return 0;
2147         }
2148
2149         vnn = find_public_ip_vnn(ctdb, &p->dst);
2150         if (vnn == NULL) {
2151                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2152                         ctdb_addr_to_str(&p->dst)));
2153
2154                 return -1;
2155         }
2156
2157
2158         tcparray = vnn->tcp_array;
2159
2160         /* If this is the first tickle */
2161         if (tcparray == NULL) {
2162                 tcparray = talloc(vnn, struct ctdb_tcp_array);
2163                 CTDB_NO_MEMORY(ctdb, tcparray);
2164                 vnn->tcp_array = tcparray;
2165
2166                 tcparray->num = 0;
2167                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2168                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2169
2170                 tcparray->connections[tcparray->num].src = p->src;
2171                 tcparray->connections[tcparray->num].dst = p->dst;
2172                 tcparray->num++;
2173
2174                 if (tcp_update_needed) {
2175                         vnn->tcp_update_needed = true;
2176                 }
2177                 return 0;
2178         }
2179
2180
2181         /* Do we already have this tickle ?*/
2182         tcp.src = p->src;
2183         tcp.dst = p->dst;
2184         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2185                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2186                         ctdb_addr_to_str(&tcp.dst),
2187                         ntohs(tcp.dst.ip.sin_port),
2188                         vnn->pnn));
2189                 return 0;
2190         }
2191
2192         /* A new tickle, we must add it to the array */
2193         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2194                                         struct ctdb_connection,
2195                                         tcparray->num+1);
2196         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2197
2198         tcparray->connections[tcparray->num].src = p->src;
2199         tcparray->connections[tcparray->num].dst = p->dst;
2200         tcparray->num++;
2201
2202         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2203                 ctdb_addr_to_str(&tcp.dst),
2204                 ntohs(tcp.dst.ip.sin_port),
2205                 vnn->pnn));
2206
2207         if (tcp_update_needed) {
2208                 vnn->tcp_update_needed = true;
2209         }
2210
2211         return 0;
2212 }
2213
2214
2215 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2216 {
2217         struct ctdb_connection *tcpp;
2218
2219         if (vnn == NULL) {
2220                 return;
2221         }
2222
2223         /* if the array is empty we cant remove it
2224            and we don't need to do anything
2225          */
2226         if (vnn->tcp_array == NULL) {
2227                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2228                         ctdb_addr_to_str(&conn->dst),
2229                         ntohs(conn->dst.ip.sin_port)));
2230                 return;
2231         }
2232
2233
2234         /* See if we know this connection
2235            if we don't know this connection  then we dont need to do anything
2236          */
2237         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2238         if (tcpp == NULL) {
2239                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2240                         ctdb_addr_to_str(&conn->dst),
2241                         ntohs(conn->dst.ip.sin_port)));
2242                 return;
2243         }
2244
2245
2246         /* We need to remove this entry from the array.
2247            Instead of allocating a new array and copying data to it
2248            we cheat and just copy the last entry in the existing array
2249            to the entry that is to be removed and just shring the 
2250            ->num field
2251          */
2252         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2253         vnn->tcp_array->num--;
2254
2255         /* If we deleted the last entry we also need to remove the entire array
2256          */
2257         if (vnn->tcp_array->num == 0) {
2258                 talloc_free(vnn->tcp_array);
2259                 vnn->tcp_array = NULL;
2260         }               
2261
2262         vnn->tcp_update_needed = true;
2263
2264         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2265                 ctdb_addr_to_str(&conn->src),
2266                 ntohs(conn->src.ip.sin_port)));
2267 }
2268
2269
2270 /*
2271   called by a daemon to inform us of a TCP connection that one of its
2272   clients used are no longer needed in the tickle database
2273  */
2274 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2275 {
2276         struct ctdb_vnn *vnn;
2277         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2278
2279         /* If we don't have public IPs, tickles are useless */
2280         if (ctdb->vnn == NULL) {
2281                 return 0;
2282         }
2283
2284         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2285         if (vnn == NULL) {
2286                 DEBUG(DEBUG_ERR,
2287                       (__location__ " unable to find public address %s\n",
2288                        ctdb_addr_to_str(&conn->dst)));
2289                 return 0;
2290         }
2291
2292         ctdb_remove_connection(vnn, conn);
2293
2294         return 0;
2295 }
2296
2297
2298 /*
2299   Called when another daemon starts - causes all tickles for all
2300   public addresses we are serving to be sent to the new node on the
2301   next check.  This actually causes the next scheduled call to
2302   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2303   doesn't require careful error handling.
2304  */
2305 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2306 {
2307         struct ctdb_vnn *vnn;
2308
2309         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2310                            (unsigned long) pnn));
2311
2312         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2313                 vnn->tcp_update_needed = true;
2314         }
2315
2316         return 0;
2317 }
2318
2319
2320 /*
2321   called when a client structure goes away - hook to remove
2322   elements from the tcp_list in all daemons
2323  */
2324 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2325 {
2326         while (client->tcp_list) {
2327                 struct ctdb_vnn *vnn;
2328                 struct ctdb_tcp_list *tcp = client->tcp_list;
2329                 struct ctdb_connection *conn = &tcp->connection;
2330
2331                 DLIST_REMOVE(client->tcp_list, tcp);
2332
2333                 vnn = find_public_ip_vnn(client->ctdb,
2334                                          &conn->dst);
2335                 if (vnn == NULL) {
2336                         DEBUG(DEBUG_ERR,
2337                               (__location__ " unable to find public address %s\n",
2338                                ctdb_addr_to_str(&conn->dst)));
2339                         continue;
2340                 }
2341
2342                 /* If the IP address is hosted on this node then
2343                  * remove the connection. */
2344                 if (vnn->pnn == client->ctdb->pnn) {
2345                         ctdb_remove_connection(vnn, conn);
2346                 }
2347
2348                 /* Otherwise this function has been called because the
2349                  * server IP address has been released to another node
2350                  * and the client has exited.  This means that we
2351                  * should not delete the connection information.  The
2352                  * takeover node processes connections too. */
2353         }
2354 }
2355
2356
2357 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2358 {
2359         struct ctdb_vnn *vnn;
2360         int count = 0;
2361         TDB_DATA data;
2362
2363         if (ctdb->tunable.disable_ip_failover == 1) {
2364                 return;
2365         }
2366
2367         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2368                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2369                         ctdb_vnn_unassign_iface(ctdb, vnn);
2370                         continue;
2371                 }
2372                 if (!vnn->iface) {
2373                         continue;
2374                 }
2375
2376                 /* Don't allow multiple releases at once.  Some code,
2377                  * particularly ctdb_tickle_sentenced_connections() is
2378                  * not re-entrant */
2379                 if (vnn->update_in_flight) {
2380                         DEBUG(DEBUG_WARNING,
2381                               (__location__
2382                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2383                                     ctdb_addr_to_str(&vnn->public_address),
2384                                     vnn->public_netmask_bits,
2385                                     ctdb_vnn_iface_string(vnn)));
2386                         continue;
2387                 }
2388                 vnn->update_in_flight = true;
2389
2390                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2391                                     ctdb_addr_to_str(&vnn->public_address),
2392                                     vnn->public_netmask_bits,
2393                                     ctdb_vnn_iface_string(vnn)));
2394
2395                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2396                                   ctdb_vnn_iface_string(vnn),
2397                                   ctdb_addr_to_str(&vnn->public_address),
2398                                   vnn->public_netmask_bits);
2399
2400                 data.dptr = (uint8_t *)talloc_strdup(
2401                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2402                 if (data.dptr != NULL) {
2403                         data.dsize = strlen((char *)data.dptr) + 1;
2404                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2405                                                  CTDB_SRVID_RELEASE_IP, data);
2406                         talloc_free(data.dptr);
2407                 }
2408
2409                 ctdb_vnn_unassign_iface(ctdb, vnn);
2410                 vnn->update_in_flight = false;
2411                 count++;
2412         }
2413
2414         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2415 }
2416
2417
2418 /*
2419   get list of public IPs
2420  */
2421 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2422                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2423 {
2424         int i, num, len;
2425         struct ctdb_public_ip_list_old *ips;
2426         struct ctdb_vnn *vnn;
2427         bool only_available = false;
2428
2429         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2430                 only_available = true;
2431         }
2432
2433         /* count how many public ip structures we have */
2434         num = 0;
2435         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2436                 num++;
2437         }
2438
2439         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2440                 num*sizeof(struct ctdb_public_ip);
2441         ips = talloc_zero_size(outdata, len);
2442         CTDB_NO_MEMORY(ctdb, ips);
2443
2444         i = 0;
2445         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2446                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2447                         continue;
2448                 }
2449                 ips->ips[i].pnn  = vnn->pnn;
2450                 ips->ips[i].addr = vnn->public_address;
2451                 i++;
2452         }
2453         ips->num = i;
2454         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2455                 i*sizeof(struct ctdb_public_ip);
2456
2457         outdata->dsize = len;
2458         outdata->dptr  = (uint8_t *)ips;
2459
2460         return 0;
2461 }
2462
2463
2464 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2465                                         struct ctdb_req_control_old *c,
2466                                         TDB_DATA indata,
2467                                         TDB_DATA *outdata)
2468 {
2469         int i, num, len;
2470         ctdb_sock_addr *addr;
2471         struct ctdb_public_ip_info_old *info;
2472         struct ctdb_vnn *vnn;
2473
2474         addr = (ctdb_sock_addr *)indata.dptr;
2475
2476         vnn = find_public_ip_vnn(ctdb, addr);
2477         if (vnn == NULL) {
2478                 /* if it is not a public ip   it could be our 'single ip' */
2479                 if (ctdb->single_ip_vnn) {
2480                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2481                                 vnn = ctdb->single_ip_vnn;
2482                         }
2483                 }
2484         }
2485         if (vnn == NULL) {
2486                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2487                                  "'%s'not a public address\n",
2488                                  ctdb_addr_to_str(addr)));
2489                 return -1;
2490         }
2491
2492         /* count how many public ip structures we have */
2493         num = 0;
2494         for (;vnn->ifaces[num];) {
2495                 num++;
2496         }
2497
2498         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2499                 num*sizeof(struct ctdb_iface);
2500         info = talloc_zero_size(outdata, len);
2501         CTDB_NO_MEMORY(ctdb, info);
2502
2503         info->ip.addr = vnn->public_address;
2504         info->ip.pnn = vnn->pnn;
2505         info->active_idx = 0xFFFFFFFF;
2506
2507         for (i=0; vnn->ifaces[i]; i++) {
2508                 struct ctdb_interface *cur;
2509
2510                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2511                 if (cur == NULL) {
2512                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2513                                            vnn->ifaces[i]));
2514                         return -1;
2515                 }
2516                 if (vnn->iface == cur) {
2517                         info->active_idx = i;
2518                 }
2519                 strncpy(info->ifaces[i].name, cur->name,
2520                         sizeof(info->ifaces[i].name));
2521                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2522                 info->ifaces[i].link_state = cur->link_up;
2523                 info->ifaces[i].references = cur->references;
2524         }
2525         info->num = i;
2526         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2527                 i*sizeof(struct ctdb_iface);
2528
2529         outdata->dsize = len;
2530         outdata->dptr  = (uint8_t *)info;
2531
2532         return 0;
2533 }
2534
2535 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2536                                 struct ctdb_req_control_old *c,
2537                                 TDB_DATA *outdata)
2538 {
2539         int i, num, len;
2540         struct ctdb_iface_list_old *ifaces;
2541         struct ctdb_interface *cur;
2542
2543         /* count how many public ip structures we have */
2544         num = 0;
2545         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2546                 num++;
2547         }
2548
2549         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2550                 num*sizeof(struct ctdb_iface);
2551         ifaces = talloc_zero_size(outdata, len);
2552         CTDB_NO_MEMORY(ctdb, ifaces);
2553
2554         i = 0;
2555         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2556                 strncpy(ifaces->ifaces[i].name, cur->name,
2557                         sizeof(ifaces->ifaces[i].name));
2558                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2559                 ifaces->ifaces[i].link_state = cur->link_up;
2560                 ifaces->ifaces[i].references = cur->references;
2561                 i++;
2562         }
2563         ifaces->num = i;
2564         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2565                 i*sizeof(struct ctdb_iface);
2566
2567         outdata->dsize = len;
2568         outdata->dptr  = (uint8_t *)ifaces;
2569
2570         return 0;
2571 }
2572
2573 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2574                                     struct ctdb_req_control_old *c,
2575                                     TDB_DATA indata)
2576 {
2577         struct ctdb_iface *info;
2578         struct ctdb_interface *iface;
2579         bool link_up = false;
2580
2581         info = (struct ctdb_iface *)indata.dptr;
2582
2583         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2584                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2585                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2586                                   len, len, info->name));
2587                 return -1;
2588         }
2589
2590         switch (info->link_state) {
2591         case 0:
2592                 link_up = false;
2593                 break;
2594         case 1:
2595                 link_up = true;
2596                 break;
2597         default:
2598                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2599                                   (unsigned int)info->link_state));
2600                 return -1;
2601         }
2602
2603         if (info->references != 0) {
2604                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2605                                   (unsigned int)info->references));
2606                 return -1;
2607         }
2608
2609         iface = ctdb_find_iface(ctdb, info->name);
2610         if (iface == NULL) {
2611                 return -1;
2612         }
2613
2614         if (link_up == iface->link_up) {
2615                 return 0;
2616         }
2617
2618         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2619               ("iface[%s] has changed it's link status %s => %s\n",
2620                iface->name,
2621                iface->link_up?"up":"down",
2622                link_up?"up":"down"));
2623
2624         iface->link_up = link_up;
2625         return 0;
2626 }
2627
2628
2629 /*
2630   called by a daemon to inform us of the entire list of TCP tickles for
2631   a particular public address.
2632   this control should only be sent by the node that is currently serving
2633   that public address.
2634  */
2635 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2636 {
2637         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2638         struct ctdb_tcp_array *tcparray;
2639         struct ctdb_vnn *vnn;
2640
2641         /* We must at least have tickles.num or else we cant verify the size
2642            of the received data blob
2643          */
2644         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2645                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2646                 return -1;
2647         }
2648
2649         /* verify that the size of data matches what we expect */
2650         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2651                          + sizeof(struct ctdb_connection) * list->num) {
2652                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2653                 return -1;
2654         }
2655
2656         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2657                            ctdb_addr_to_str(&list->addr)));
2658
2659         vnn = find_public_ip_vnn(ctdb, &list->addr);
2660         if (vnn == NULL) {
2661                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2662                         ctdb_addr_to_str(&list->addr)));
2663
2664                 return 1;
2665         }
2666
2667         if (vnn->pnn == ctdb->pnn) {
2668                 DEBUG(DEBUG_INFO,
2669                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2670                        ctdb_addr_to_str(&list->addr)));
2671                 return 0;
2672         }
2673
2674         /* remove any old ticklelist we might have */
2675         talloc_free(vnn->tcp_array);
2676         vnn->tcp_array = NULL;
2677
2678         tcparray = talloc(vnn, struct ctdb_tcp_array);
2679         CTDB_NO_MEMORY(ctdb, tcparray);
2680
2681         tcparray->num = list->num;
2682
2683         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2684         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2685
2686         memcpy(tcparray->connections, &list->connections[0],
2687                sizeof(struct ctdb_connection)*tcparray->num);
2688
2689         /* We now have a new fresh tickle list array for this vnn */
2690         vnn->tcp_array = tcparray;
2691
2692         return 0;
2693 }
2694
2695 /*
2696   called to return the full list of tickles for the puclic address associated 
2697   with the provided vnn
2698  */
2699 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2700 {
2701         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2702         struct ctdb_tickle_list_old *list;
2703         struct ctdb_tcp_array *tcparray;
2704         int num;
2705         struct ctdb_vnn *vnn;
2706
2707         vnn = find_public_ip_vnn(ctdb, addr);
2708         if (vnn == NULL) {
2709                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2710                         ctdb_addr_to_str(addr)));
2711
2712                 return 1;
2713         }
2714
2715         tcparray = vnn->tcp_array;
2716         if (tcparray) {
2717                 num = tcparray->num;
2718         } else {
2719                 num = 0;
2720         }
2721
2722         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2723                         + sizeof(struct ctdb_connection) * num;
2724
2725         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2726         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2727         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2728
2729         list->addr = *addr;
2730         list->num = num;
2731         if (num) {
2732                 memcpy(&list->connections[0], tcparray->connections,
2733                         sizeof(struct ctdb_connection) * num);
2734         }
2735
2736         return 0;
2737 }
2738
2739
2740 /*
2741   set the list of all tcp tickles for a public address
2742  */
2743 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2744                                             ctdb_sock_addr *addr,
2745                                             struct ctdb_tcp_array *tcparray)
2746 {
2747         int ret, num;
2748         TDB_DATA data;
2749         struct ctdb_tickle_list_old *list;
2750
2751         if (tcparray) {
2752                 num = tcparray->num;
2753         } else {
2754                 num = 0;
2755         }
2756
2757         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2758                         sizeof(struct ctdb_connection) * num;
2759         data.dptr = talloc_size(ctdb, data.dsize);
2760         CTDB_NO_MEMORY(ctdb, data.dptr);
2761
2762         list = (struct ctdb_tickle_list_old *)data.dptr;
2763         list->addr = *addr;
2764         list->num = num;
2765         if (tcparray) {
2766                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2767         }
2768
2769         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2770                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2771                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2772         if (ret != 0) {
2773                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2774                 return -1;
2775         }
2776
2777         talloc_free(data.dptr);
2778
2779         return ret;
2780 }
2781
2782
2783 /*
2784   perform tickle updates if required
2785  */
2786 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2787                                     struct tevent_timer *te,
2788                                     struct timeval t, void *private_data)
2789 {
2790         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2791         int ret;
2792         struct ctdb_vnn *vnn;
2793
2794         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2795                 /* we only send out updates for public addresses that 
2796                    we have taken over
2797                  */
2798                 if (ctdb->pnn != vnn->pnn) {
2799                         continue;
2800                 }
2801                 /* We only send out the updates if we need to */
2802                 if (!vnn->tcp_update_needed) {
2803                         continue;
2804                 }
2805                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2806                                                        &vnn->public_address,
2807                                                        vnn->tcp_array);
2808                 if (ret != 0) {
2809                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2810                                 ctdb_addr_to_str(&vnn->public_address)));
2811                 } else {
2812                         DEBUG(DEBUG_INFO,
2813                               ("Sent tickle update for public address %s\n",
2814                                ctdb_addr_to_str(&vnn->public_address)));
2815                         vnn->tcp_update_needed = false;
2816                 }
2817         }
2818
2819         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2820                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2821                          ctdb_update_tcp_tickles, ctdb);
2822 }
2823
2824 /*
2825   start periodic update of tcp tickles
2826  */
2827 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2828 {
2829         ctdb->tickle_update_context = talloc_new(ctdb);
2830
2831         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2832                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2833                          ctdb_update_tcp_tickles, ctdb);
2834 }
2835
2836
2837
2838
2839 struct control_gratious_arp {
2840         struct ctdb_context *ctdb;
2841         ctdb_sock_addr addr;
2842         const char *iface;
2843         int count;
2844 };
2845
2846 /*
2847   send a control_gratuitous arp
2848  */
2849 static void send_gratious_arp(struct tevent_context *ev,
2850                               struct tevent_timer *te,
2851                               struct timeval t, void *private_data)
2852 {
2853         int ret;
2854         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2855                                                         struct control_gratious_arp);
2856
2857         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2858         if (ret != 0) {
2859                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2860                                  arp->iface, strerror(errno)));
2861         }
2862
2863
2864         arp->count++;
2865         if (arp->count == CTDB_ARP_REPEAT) {
2866                 talloc_free(arp);
2867                 return;
2868         }
2869
2870         tevent_add_timer(arp->ctdb->ev, arp,
2871                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2872                          send_gratious_arp, arp);
2873 }
2874
2875
2876 /*
2877   send a gratious arp 
2878  */
2879 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2880 {
2881         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2882         struct control_gratious_arp *arp;
2883
2884         /* verify the size of indata */
2885         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2886                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2887                                  (unsigned)indata.dsize, 
2888                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2889                 return -1;
2890         }
2891         if (indata.dsize != 
2892                 ( offsetof(struct ctdb_addr_info_old, iface)
2893                 + gratious_arp->len ) ){
2894
2895                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2896                         "but should be %u bytes\n", 
2897                          (unsigned)indata.dsize, 
2898                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2899                 return -1;
2900         }
2901
2902
2903         arp = talloc(ctdb, struct control_gratious_arp);
2904         CTDB_NO_MEMORY(ctdb, arp);
2905
2906         arp->ctdb  = ctdb;
2907         arp->addr   = gratious_arp->addr;
2908         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2909         CTDB_NO_MEMORY(ctdb, arp->iface);
2910         arp->count = 0;
2911
2912         tevent_add_timer(arp->ctdb->ev, arp,
2913                          timeval_zero(), send_gratious_arp, arp);
2914
2915         return 0;
2916 }
2917
2918 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2919 {
2920         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2921         int ret;
2922
2923         /* verify the size of indata */
2924         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2925                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2926                 return -1;
2927         }
2928         if (indata.dsize != 
2929                 ( offsetof(struct ctdb_addr_info_old, iface)
2930                 + pub->len ) ){
2931
2932                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2933                         "but should be %u bytes\n", 
2934                          (unsigned)indata.dsize, 
2935                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2936                 return -1;
2937         }
2938
2939         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2940
2941         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2942
2943         if (ret != 0) {
2944                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2945                 return -1;
2946         }
2947
2948         return 0;
2949 }
2950
2951 struct delete_ip_callback_state {
2952         struct ctdb_req_control_old *c;
2953 };
2954
2955 /*
2956   called when releaseip event finishes for del_public_address
2957  */
2958 static void delete_ip_callback(struct ctdb_context *ctdb,
2959                                int32_t status, TDB_DATA data,
2960                                const char *errormsg,
2961                                void *private_data)
2962 {
2963         struct delete_ip_callback_state *state =
2964                 talloc_get_type(private_data, struct delete_ip_callback_state);
2965
2966         /* If release failed then fail. */
2967         ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2968         talloc_free(private_data);
2969 }
2970
2971 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2972                                         struct ctdb_req_control_old *c,
2973                                         TDB_DATA indata, bool *async_reply)
2974 {
2975         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2976         struct ctdb_vnn *vnn;
2977
2978         /* verify the size of indata */
2979         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2980                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2981                 return -1;
2982         }
2983         if (indata.dsize != 
2984                 ( offsetof(struct ctdb_addr_info_old, iface)
2985                 + pub->len ) ){
2986
2987                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2988                         "but should be %u bytes\n", 
2989                          (unsigned)indata.dsize, 
2990                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2991                 return -1;
2992         }
2993
2994         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2995
2996         /* walk over all public addresses until we find a match */
2997         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2998                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2999                         if (vnn->pnn == ctdb->pnn) {
3000                                 struct delete_ip_callback_state *state;
3001                                 struct ctdb_public_ip *ip;
3002                                 TDB_DATA data;
3003                                 int ret;
3004
3005                                 vnn->delete_pending = true;
3006
3007                                 state = talloc(ctdb,
3008                                                struct delete_ip_callback_state);
3009                                 CTDB_NO_MEMORY(ctdb, state);
3010                                 state->c = c;
3011
3012                                 ip = talloc(state, struct ctdb_public_ip);
3013                                 if (ip == NULL) {
3014                                         DEBUG(DEBUG_ERR,
3015                                               (__location__ " Out of memory\n"));
3016                                         talloc_free(state);
3017                                         return -1;
3018                                 }
3019                                 ip->pnn = -1;
3020                                 ip->addr = pub->addr;
3021
3022                                 data.dsize = sizeof(struct ctdb_public_ip);
3023                                 data.dptr = (unsigned char *)ip;
3024
3025                                 ret = ctdb_daemon_send_control(ctdb,
3026                                                                ctdb_get_pnn(ctdb),
3027                                                                0,
3028                                                                CTDB_CONTROL_RELEASE_IP,
3029                                                                0, 0,
3030                                                                data,
3031                                                                delete_ip_callback,
3032                                                                state);
3033                                 if (ret == -1) {
3034                                         DEBUG(DEBUG_ERR,
3035                                               (__location__ "Unable to send "
3036                                                "CTDB_CONTROL_RELEASE_IP\n"));
3037                                         talloc_free(state);
3038                                         return -1;
3039                                 }
3040
3041                                 state->c = talloc_steal(state, c);
3042                                 *async_reply = true;
3043                         } else {
3044                                 /* This IP is not hosted on the
3045                                  * current node so just delete it
3046                                  * now. */
3047                                 do_delete_ip(ctdb, vnn);
3048                         }
3049
3050                         return 0;
3051                 }
3052         }
3053
3054         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
3055                          ctdb_addr_to_str(&pub->addr)));
3056         return -1;
3057 }
3058
3059
3060 struct ipreallocated_callback_state {
3061         struct ctdb_req_control_old *c;
3062 };
3063
3064 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
3065                                         int status, void *p)
3066 {
3067         struct ipreallocated_callback_state *state =
3068                 talloc_get_type(p, struct ipreallocated_callback_state);
3069
3070         if (status != 0) {
3071                 DEBUG(DEBUG_ERR,
3072                       (" \"ipreallocated\" event script failed (status %d)\n",
3073                        status));
3074                 if (status == -ETIME) {
3075                         ctdb_ban_self(ctdb);
3076                 }
3077         }
3078
3079         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
3080         talloc_free(state);
3081 }
3082
3083 /* A control to run the ipreallocated event */
3084 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
3085                                    struct ctdb_req_control_old *c,
3086                                    bool *async_reply)
3087 {
3088         int ret;
3089         struct ipreallocated_callback_state *state;
3090
3091         state = talloc(ctdb, struct ipreallocated_callback_state);
3092         CTDB_NO_MEMORY(ctdb, state);
3093
3094         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3095
3096         ret = ctdb_event_script_callback(ctdb, state,
3097                                          ctdb_ipreallocated_callback, state,
3098                                          CTDB_EVENT_IPREALLOCATED,
3099                                          "%s", "");
3100
3101         if (ret != 0) {
3102                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3103                 talloc_free(state);
3104                 return -1;
3105         }
3106
3107         /* tell the control that we will be reply asynchronously */
3108         state->c    = talloc_steal(state, c);
3109         *async_reply = true;
3110
3111         return 0;
3112 }
3113
3114
3115 /* This function is called from the recovery daemon to verify that a remote
3116    node has the expected ip allocation.
3117    This is verified against ctdb->ip_tree
3118 */
3119 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
3120                                        struct ctdb_public_ip_list *ips,
3121                                        uint32_t pnn)
3122 {
3123         struct public_ip_list *tmp_ip;
3124         int i;
3125
3126         if (ctdb->ip_tree == NULL) {
3127                 /* don't know the expected allocation yet, assume remote node
3128                    is correct. */
3129                 return 0;
3130         }
3131
3132         if (ips == NULL) {
3133                 return 0;
3134         }
3135
3136         for (i=0; i<ips->num; i++) {
3137                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
3138                 if (tmp_ip == NULL) {
3139                         DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
3140                         return -1;
3141                 }
3142
3143                 if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
3144                         continue;
3145                 }
3146
3147                 if (tmp_ip->pnn != ips->ip[i].pnn) {
3148                         DEBUG(DEBUG_ERR,
3149                               ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3150                                pnn,
3151                                ctdb_addr_to_str(&ips->ip[i].addr),
3152                                ips->ip[i].pnn, tmp_ip->pnn));
3153                         return -1;
3154                 }
3155         }
3156
3157         return 0;
3158 }
3159
3160 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3161 {
3162         struct public_ip_list *tmp_ip;
3163
3164         /* IP tree is never built if DisableIPFailover is set */
3165         if (ctdb->tunable.disable_ip_failover != 0) {
3166                 return 0;
3167         }
3168
3169         if (ctdb->ip_tree == NULL) {
3170                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3171                 return -1;
3172         }
3173
3174         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3175         if (tmp_ip == NULL) {
3176                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3177                 return -1;
3178         }
3179
3180         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3181         tmp_ip->pnn = ip->pnn;
3182
3183         return 0;
3184 }
3185
3186 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3187 {
3188         TALLOC_FREE(ctdb->ip_tree);
3189 }
3190
3191 struct ctdb_reloadips_handle {
3192         struct ctdb_context *ctdb;
3193         struct ctdb_req_control_old *c;
3194         int status;
3195         int fd[2];
3196         pid_t child;
3197         struct tevent_fd *fde;
3198 };
3199
3200 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3201 {
3202         if (h == h->ctdb->reload_ips) {
3203                 h->ctdb->reload_ips = NULL;
3204         }
3205         if (h->c != NULL) {
3206                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3207                 h->c = NULL;
3208         }
3209         ctdb_kill(h->ctdb, h->child, SIGKILL);
3210         return 0;
3211 }
3212
3213 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3214                                          struct tevent_timer *te,
3215                                          struct timeval t, void *private_data)
3216 {
3217         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3218
3219         talloc_free(h);
3220 }
3221
3222 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3223                                          struct tevent_fd *fde,
3224                                          uint16_t flags, void *private_data)
3225 {
3226         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3227
3228         char res;
3229         int ret;
3230
3231         ret = sys_read(h->fd[0], &res, 1);
3232         if (ret < 1 || res != 0) {
3233                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3234                 res = 1;
3235         }
3236         h->status = res;
3237
3238         talloc_free(h);
3239 }
3240
3241 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3242 {
3243         TALLOC_CTX *mem_ctx = talloc_new(NULL);
3244         struct ctdb_public_ip_list_old *ips;
3245         struct ctdb_vnn *vnn;
3246         struct client_async_data *async_data;
3247         struct timeval timeout;
3248         TDB_DATA data;
3249         struct ctdb_client_control_state *state;
3250         bool first_add;
3251         int i, ret;
3252
3253         CTDB_NO_MEMORY(ctdb, mem_ctx);
3254
3255         /* Read IPs from local node */
3256         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3257                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
3258         if (ret != 0) {
3259                 DEBUG(DEBUG_ERR,
3260                       ("Unable to fetch public IPs from local node\n"));
3261                 talloc_free(mem_ctx);
3262                 return -1;
3263         }
3264
3265         /* Read IPs file - this is safe since this is a child process */
3266         ctdb->vnn = NULL;
3267         if (ctdb_set_public_addresses(ctdb, false) != 0) {
3268                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3269                 talloc_free(mem_ctx);
3270                 return -1;
3271         }
3272
3273         async_data = talloc_zero(mem_ctx, struct client_async_data);
3274         CTDB_NO_MEMORY(ctdb, async_data);
3275
3276         /* Compare IPs between node and file for IPs to be deleted */
3277         for (i = 0; i < ips->num; i++) {
3278                 /* */
3279                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3280                         if (ctdb_same_ip(&vnn->public_address,
3281                                          &ips->ips[i].addr)) {
3282                                 /* IP is still in file */
3283                                 break;
3284                         }
3285                 }
3286
3287                 if (vnn == NULL) {
3288                         /* Delete IP ips->ips[i] */
3289                         struct ctdb_addr_info_old *pub;
3290
3291                         DEBUG(DEBUG_NOTICE,
3292                               ("IP %s no longer configured, deleting it\n",
3293                                ctdb_addr_to_str(&ips->ips[i].addr)));
3294
3295                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3296                         CTDB_NO_MEMORY(ctdb, pub);
3297
3298                         pub->addr  = ips->ips[i].addr;
3299                         pub->mask  = 0;
3300                         pub->len   = 0;
3301
3302                         timeout = TAKEOVER_TIMEOUT();
3303
3304                         data.dsize = offsetof(struct ctdb_addr_info_old,
3305                                               iface) + pub->len;
3306                         data.dptr = (uint8_t *)pub;
3307
3308                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3309                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
3310                                                   0, data, async_data,
3311                                                   &timeout, NULL);
3312                         if (state == NULL) {
3313                                 DEBUG(DEBUG_ERR,
3314                                       (__location__
3315                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3316                                 goto failed;
3317                         }
3318
3319                         ctdb_client_async_add(async_data, state);
3320                 }
3321         }
3322
3323         /* Compare IPs between node and file for IPs to be added */
3324         first_add = true;
3325         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3326                 for (i = 0; i < ips->num; i++) {
3327                         if (ctdb_same_ip(&vnn->public_address,
3328                                          &ips->ips[i].addr)) {
3329                                 /* IP already on node */
3330                                 break;
3331                         }
3332                 }
3333                 if (i == ips->num) {
3334                         /* Add IP ips->ips[i] */
3335                         struct ctdb_addr_info_old *pub;
3336                         const char *ifaces = NULL;
3337                         uint32_t len;
3338                         int iface = 0;
3339
3340                         DEBUG(DEBUG_NOTICE,
3341                               ("New IP %s configured, adding it\n",
3342                                ctdb_addr_to_str(&vnn->public_address)));
3343                         if (first_add) {
3344                                 uint32_t pnn = ctdb_get_pnn(ctdb);
3345
3346                                 data.dsize = sizeof(pnn);
3347                                 data.dptr  = (uint8_t *)&pnn;
3348
3349                                 ret = ctdb_client_send_message(
3350                                         ctdb,
3351                                         CTDB_BROADCAST_CONNECTED,
3352                                         CTDB_SRVID_REBALANCE_NODE,
3353                                         data);
3354                                 if (ret != 0) {
3355                                         DEBUG(DEBUG_WARNING,
3356                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3357                                 }
3358
3359                                 first_add = false;
3360                         }
3361
3362                         ifaces = vnn->ifaces[0];
3363                         iface = 1;
3364                         while (vnn->ifaces[iface] != NULL) {
3365                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3366                                                          vnn->ifaces[iface]);
3367                                 iface++;
3368                         }
3369
3370                         len   = strlen(ifaces) + 1;
3371                         pub = talloc_zero_size(mem_ctx,
3372                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3373                         CTDB_NO_MEMORY(ctdb, pub);
3374
3375                         pub->addr  = vnn->public_address;
3376                         pub->mask  = vnn->public_netmask_bits;
3377                         pub->len   = len;
3378                         memcpy(&pub->iface[0], ifaces, pub->len);
3379
3380                         timeout = TAKEOVER_TIMEOUT();
3381
3382                         data.dsize = offsetof(struct ctdb_addr_info_old,
3383                                               iface) + pub->len;
3384                         data.dptr = (uint8_t *)pub;
3385
3386                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3387                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3388                                                   0, data, async_data,
3389                                                   &timeout, NULL);
3390                         if (state == NULL) {
3391                                 DEBUG(DEBUG_ERR,
3392                                       (__location__
3393                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3394                                 goto failed;
3395                         }
3396
3397                         ctdb_client_async_add(async_data, state);
3398                 }
3399         }
3400
3401         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3402                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3403                 goto failed;
3404         }
3405
3406         talloc_free(mem_ctx);
3407         return 0;
3408
3409 failed:
3410         talloc_free(mem_ctx);
3411         return -1;
3412 }
3413
3414 /* This control is sent to force the node to re-read the public addresses file
3415    and drop any addresses we should nnot longer host, and add new addresses
3416    that we are now able to host
3417 */
3418 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3419 {
3420         struct ctdb_reloadips_handle *h;
3421         pid_t parent = getpid();
3422
3423         if (ctdb->reload_ips != NULL) {
3424                 talloc_free(ctdb->reload_ips);
3425                 ctdb->reload_ips = NULL;
3426         }
3427
3428         h = talloc(ctdb, struct ctdb_reloadips_handle);
3429         CTDB_NO_MEMORY(ctdb, h);
3430         h->ctdb     = ctdb;
3431         h->c        = NULL;
3432         h->status   = -1;
3433         
3434         if (pipe(h->fd) == -1) {
3435                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3436                 talloc_free(h);
3437                 return -1;
3438         }
3439
3440         h->child = ctdb_fork(ctdb);
3441         if (h->child == (pid_t)-1) {
3442                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3443                 close(h->fd[0]);
3444                 close(h->fd[1]);
3445                 talloc_free(h);
3446                 return -1;
3447         }
3448
3449         /* child process */
3450         if (h->child == 0) {
3451                 signed char res = 0;
3452
3453                 close(h->fd[0]);
3454                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3455
3456                 prctl_set_comment("ctdb_reloadips");
3457                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3458                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3459                         res = -1;
3460                 } else {
3461                         res = ctdb_reloadips_child(ctdb);
3462                         if (res != 0) {
3463                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3464                         }
3465                 }
3466
3467                 sys_write(h->fd[1], &res, 1);
3468                 ctdb_wait_for_process_to_exit(parent);
3469                 _exit(0);
3470         }
3471
3472         h->c             = talloc_steal(h, c);
3473
3474         close(h->fd[1]);
3475         set_close_on_exec(h->fd[0]);
3476
3477         talloc_set_destructor(h, ctdb_reloadips_destructor);
3478
3479
3480         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3481                                ctdb_reloadips_child_handler, (void *)h);
3482         tevent_fd_set_auto_close(h->fde);
3483
3484         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3485                          ctdb_reloadips_timeout_event, h);
3486
3487         /* we reply later */
3488         *async_reply = true;
3489         return 0;
3490 }