ctdb-daemon: Move port filtering to server side when getting tickles
[samba.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
59 {
60         if (vnn->iface) {
61                 return vnn->iface->name;
62         }
63
64         return "__none__";
65 }
66
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
68 {
69         struct ctdb_interface *i;
70
71         if (strlen(iface) > CTDB_IFACE_SIZE) {
72                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
73                 return -1;
74         }
75
76         /* Verify that we don't have an entry for this ip yet */
77         for (i=ctdb->ifaces;i;i=i->next) {
78                 if (strcmp(i->name, iface) == 0) {
79                         return 0;
80                 }
81         }
82
83         /* create a new structure for this interface */
84         i = talloc_zero(ctdb, struct ctdb_interface);
85         CTDB_NO_MEMORY_FATAL(ctdb, i);
86         i->name = talloc_strdup(i, iface);
87         CTDB_NO_MEMORY(ctdb, i->name);
88
89         i->link_up = true;
90
91         DLIST_ADD(ctdb->ifaces, i);
92
93         return 0;
94 }
95
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
97                                         const char *name)
98 {
99         int n;
100
101         for (n = 0; vnn->ifaces[n] != NULL; n++) {
102                 if (strcmp(name, vnn->ifaces[n]) == 0) {
103                         return true;
104                 }
105         }
106
107         return false;
108 }
109
110 /* If any interfaces now have no possible IPs then delete them.  This
111  * implementation is naive (i.e. simple) rather than clever
112  * (i.e. complex).  Given that this is run on delip and that operation
113  * is rare, this doesn't need to be efficient - it needs to be
114  * foolproof.  One alternative is reference counting, where the logic
115  * is distributed and can, therefore, be broken in multiple places.
116  * Another alternative is to build a red-black tree of interfaces that
117  * can have addresses (by walking ctdb->vnn once) and then walking
118  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
119  * one of those if the naive implementation causes problems...  :-)
120  */
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122                                         struct ctdb_vnn *vnn)
123 {
124         struct ctdb_interface *i, *next;
125
126         /* For each interface, check if there's an IP using it. */
127         for (i = ctdb->ifaces; i != NULL; i = next) {
128                 struct ctdb_vnn *tv;
129                 bool found;
130                 next = i->next;
131
132                 /* Only consider interfaces named in the given VNN. */
133                 if (!vnn_has_interface_with_name(vnn, i->name)) {
134                         continue;
135                 }
136
137                 /* Search for a vnn with this interface. */
138                 found = false;
139                 for (tv=ctdb->vnn; tv; tv=tv->next) {
140                         if (vnn_has_interface_with_name(tv, i->name)) {
141                                 found = true;
142                                 break;
143                         }
144                 }
145
146                 if (!found) {
147                         /* None of the VNNs are using this interface. */
148                         DLIST_REMOVE(ctdb->ifaces, i);
149                         talloc_free(i);
150                 }
151         }
152 }
153
154
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
156                                               const char *iface)
157 {
158         struct ctdb_interface *i;
159
160         for (i=ctdb->ifaces;i;i=i->next) {
161                 if (strcmp(i->name, iface) == 0) {
162                         return i;
163                 }
164         }
165
166         return NULL;
167 }
168
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170                                                   struct ctdb_vnn *vnn)
171 {
172         int i;
173         struct ctdb_interface *cur = NULL;
174         struct ctdb_interface *best = NULL;
175
176         for (i=0; vnn->ifaces[i]; i++) {
177
178                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
179                 if (cur == NULL) {
180                         continue;
181                 }
182
183                 if (!cur->link_up) {
184                         continue;
185                 }
186
187                 if (best == NULL) {
188                         best = cur;
189                         continue;
190                 }
191
192                 if (cur->references < best->references) {
193                         best = cur;
194                         continue;
195                 }
196         }
197
198         return best;
199 }
200
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202                                      struct ctdb_vnn *vnn)
203 {
204         struct ctdb_interface *best = NULL;
205
206         if (vnn->iface) {
207                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208                                    "still assigned to iface '%s'\n",
209                                    ctdb_addr_to_str(&vnn->public_address),
210                                    ctdb_vnn_iface_string(vnn)));
211                 return 0;
212         }
213
214         best = ctdb_vnn_best_iface(ctdb, vnn);
215         if (best == NULL) {
216                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217                                   "cannot assign to iface any iface\n",
218                                   ctdb_addr_to_str(&vnn->public_address)));
219                 return -1;
220         }
221
222         vnn->iface = best;
223         best->references++;
224         vnn->pnn = ctdb->pnn;
225
226         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227                            "now assigned to iface '%s' refs[%d]\n",
228                            ctdb_addr_to_str(&vnn->public_address),
229                            ctdb_vnn_iface_string(vnn),
230                            best->references));
231         return 0;
232 }
233
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235                                     struct ctdb_vnn *vnn)
236 {
237         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238                            "now unassigned (old iface '%s' refs[%d])\n",
239                            ctdb_addr_to_str(&vnn->public_address),
240                            ctdb_vnn_iface_string(vnn),
241                            vnn->iface?vnn->iface->references:0));
242         if (vnn->iface) {
243                 vnn->iface->references--;
244         }
245         vnn->iface = NULL;
246         if (vnn->pnn == ctdb->pnn) {
247                 vnn->pnn = -1;
248         }
249 }
250
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252                                struct ctdb_vnn *vnn)
253 {
254         int i;
255
256         /* Nodes that are not RUNNING can not host IPs */
257         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
258                 return false;
259         }
260
261         if (vnn->delete_pending) {
262                 return false;
263         }
264
265         if (vnn->iface && vnn->iface->link_up) {
266                 return true;
267         }
268
269         for (i=0; vnn->ifaces[i]; i++) {
270                 struct ctdb_interface *cur;
271
272                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
273                 if (cur == NULL) {
274                         continue;
275                 }
276
277                 if (cur->link_up) {
278                         return true;
279                 }
280         }
281
282         return false;
283 }
284
285 struct ctdb_takeover_arp {
286         struct ctdb_context *ctdb;
287         uint32_t count;
288         ctdb_sock_addr addr;
289         struct ctdb_tcp_array *tcparray;
290         struct ctdb_vnn *vnn;
291 };
292
293
294 /*
295   lists of tcp endpoints
296  */
297 struct ctdb_tcp_list {
298         struct ctdb_tcp_list *prev, *next;
299         struct ctdb_connection connection;
300 };
301
302 /*
303   list of clients to kill on IP release
304  */
305 struct ctdb_client_ip {
306         struct ctdb_client_ip *prev, *next;
307         struct ctdb_context *ctdb;
308         ctdb_sock_addr addr;
309         uint32_t client_id;
310 };
311
312
313 /*
314   send a gratuitous arp
315  */
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317                                   struct tevent_timer *te,
318                                   struct timeval t, void *private_data)
319 {
320         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
321                                                         struct ctdb_takeover_arp);
322         int i, ret;
323         struct ctdb_tcp_array *tcparray;
324         const char *iface = ctdb_vnn_iface_string(arp->vnn);
325
326         ret = ctdb_sys_send_arp(&arp->addr, iface);
327         if (ret != 0) {
328                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329                                   iface, strerror(errno)));
330         }
331
332         tcparray = arp->tcparray;
333         if (tcparray) {
334                 for (i=0;i<tcparray->num;i++) {
335                         struct ctdb_connection *tcon;
336
337                         tcon = &tcparray->connections[i];
338                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
340                                 ctdb_addr_to_str(&tcon->src),
341                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
342                         ret = ctdb_sys_send_tcp(
343                                 &tcon->src,
344                                 &tcon->dst,
345                                 0, 0, 0);
346                         if (ret != 0) {
347                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348                                         ctdb_addr_to_str(&tcon->src)));
349                         }
350                 }
351         }
352
353         arp->count++;
354
355         if (arp->count == CTDB_ARP_REPEAT) {
356                 talloc_free(arp);
357                 return;
358         }
359
360         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362                          ctdb_control_send_arp, arp);
363 }
364
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366                                        struct ctdb_vnn *vnn)
367 {
368         struct ctdb_takeover_arp *arp;
369         struct ctdb_tcp_array *tcparray;
370
371         if (!vnn->takeover_ctx) {
372                 vnn->takeover_ctx = talloc_new(vnn);
373                 if (!vnn->takeover_ctx) {
374                         return -1;
375                 }
376         }
377
378         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
379         if (!arp) {
380                 return -1;
381         }
382
383         arp->ctdb = ctdb;
384         arp->addr = vnn->public_address;
385         arp->vnn  = vnn;
386
387         tcparray = vnn->tcp_array;
388         if (tcparray) {
389                 /* add all of the known tcp connections for this IP to the
390                    list of tcp connections to send tickle acks for */
391                 arp->tcparray = talloc_steal(arp, tcparray);
392
393                 vnn->tcp_array = NULL;
394                 vnn->tcp_update_needed = true;
395         }
396
397         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398                          timeval_zero(), ctdb_control_send_arp, arp);
399
400         return 0;
401 }
402
403 struct takeover_callback_state {
404         struct ctdb_req_control_old *c;
405         ctdb_sock_addr *addr;
406         struct ctdb_vnn *vnn;
407 };
408
409 struct ctdb_do_takeip_state {
410         struct ctdb_req_control_old *c;
411         struct ctdb_vnn *vnn;
412 };
413
414 /*
415   called when takeip event finishes
416  */
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
418                                     void *private_data)
419 {
420         struct ctdb_do_takeip_state *state =
421                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
422         int32_t ret;
423         TDB_DATA data;
424
425         if (status != 0) {
426                 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
427         
428                 if (status == -ETIME) {
429                         ctdb_ban_self(ctdb);
430                 }
431                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
432                                  ctdb_addr_to_str(&state->vnn->public_address),
433                                  ctdb_vnn_iface_string(state->vnn)));
434                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
435
436                 node->flags |= NODE_FLAGS_UNHEALTHY;
437                 talloc_free(state);
438                 return;
439         }
440
441         if (ctdb->do_checkpublicip) {
442
443         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
444         if (ret != 0) {
445                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
446                 talloc_free(state);
447                 return;
448         }
449
450         }
451
452         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
453         data.dsize = strlen((char *)data.dptr) + 1;
454         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
455
456         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
457
458
459         /* the control succeeded */
460         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
461         talloc_free(state);
462         return;
463 }
464
465 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
466 {
467         state->vnn->update_in_flight = false;
468         return 0;
469 }
470
471 /*
472   take over an ip address
473  */
474 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
475                               struct ctdb_req_control_old *c,
476                               struct ctdb_vnn *vnn)
477 {
478         int ret;
479         struct ctdb_do_takeip_state *state;
480
481         if (vnn->update_in_flight) {
482                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
483                                     "update for this IP already in flight\n",
484                                     ctdb_addr_to_str(&vnn->public_address),
485                                     vnn->public_netmask_bits));
486                 return -1;
487         }
488
489         ret = ctdb_vnn_assign_iface(ctdb, vnn);
490         if (ret != 0) {
491                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
492                                  "assign a usable interface\n",
493                                  ctdb_addr_to_str(&vnn->public_address),
494                                  vnn->public_netmask_bits));
495                 return -1;
496         }
497
498         state = talloc(vnn, struct ctdb_do_takeip_state);
499         CTDB_NO_MEMORY(ctdb, state);
500
501         state->c = talloc_steal(ctdb, c);
502         state->vnn   = vnn;
503
504         vnn->update_in_flight = true;
505         talloc_set_destructor(state, ctdb_takeip_destructor);
506
507         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
508                             ctdb_addr_to_str(&vnn->public_address),
509                             vnn->public_netmask_bits,
510                             ctdb_vnn_iface_string(vnn)));
511
512         ret = ctdb_event_script_callback(ctdb,
513                                          state,
514                                          ctdb_do_takeip_callback,
515                                          state,
516                                          CTDB_EVENT_TAKE_IP,
517                                          "%s %s %u",
518                                          ctdb_vnn_iface_string(vnn),
519                                          ctdb_addr_to_str(&vnn->public_address),
520                                          vnn->public_netmask_bits);
521
522         if (ret != 0) {
523                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
524                         ctdb_addr_to_str(&vnn->public_address),
525                         ctdb_vnn_iface_string(vnn)));
526                 talloc_free(state);
527                 return -1;
528         }
529
530         return 0;
531 }
532
533 struct ctdb_do_updateip_state {
534         struct ctdb_req_control_old *c;
535         struct ctdb_interface *old;
536         struct ctdb_vnn *vnn;
537 };
538
539 /*
540   called when updateip event finishes
541  */
542 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
543                                       void *private_data)
544 {
545         struct ctdb_do_updateip_state *state =
546                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
547         int32_t ret;
548
549         if (status != 0) {
550                 if (status == -ETIME) {
551                         ctdb_ban_self(ctdb);
552                 }
553                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
554                         ctdb_addr_to_str(&state->vnn->public_address),
555                         state->old->name,
556                         ctdb_vnn_iface_string(state->vnn)));
557
558                 /*
559                  * All we can do is reset the old interface
560                  * and let the next run fix it
561                  */
562                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
563                 state->vnn->iface = state->old;
564                 state->vnn->iface->references++;
565
566                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
567                 talloc_free(state);
568                 return;
569         }
570
571         if (ctdb->do_checkpublicip) {
572
573         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
574         if (ret != 0) {
575                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
576                 talloc_free(state);
577                 return;
578         }
579
580         }
581
582         /* the control succeeded */
583         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
584         talloc_free(state);
585         return;
586 }
587
588 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
589 {
590         state->vnn->update_in_flight = false;
591         return 0;
592 }
593
594 /*
595   update (move) an ip address
596  */
597 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
598                                 struct ctdb_req_control_old *c,
599                                 struct ctdb_vnn *vnn)
600 {
601         int ret;
602         struct ctdb_do_updateip_state *state;
603         struct ctdb_interface *old = vnn->iface;
604         const char *new_name;
605
606         if (vnn->update_in_flight) {
607                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
608                                     "update for this IP already in flight\n",
609                                     ctdb_addr_to_str(&vnn->public_address),
610                                     vnn->public_netmask_bits));
611                 return -1;
612         }
613
614         ctdb_vnn_unassign_iface(ctdb, vnn);
615         ret = ctdb_vnn_assign_iface(ctdb, vnn);
616         if (ret != 0) {
617                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
618                                  "assin a usable interface (old iface '%s')\n",
619                                  ctdb_addr_to_str(&vnn->public_address),
620                                  vnn->public_netmask_bits,
621                                  old->name));
622                 return -1;
623         }
624
625         new_name = ctdb_vnn_iface_string(vnn);
626         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
627                 /* A benign update from one interface onto itself.
628                  * no need to run the eventscripts in this case, just return
629                  * success.
630                  */
631                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
632                 return 0;
633         }
634
635         state = talloc(vnn, struct ctdb_do_updateip_state);
636         CTDB_NO_MEMORY(ctdb, state);
637
638         state->c = talloc_steal(ctdb, c);
639         state->old = old;
640         state->vnn = vnn;
641
642         vnn->update_in_flight = true;
643         talloc_set_destructor(state, ctdb_updateip_destructor);
644
645         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
646                             "interface %s to %s\n",
647                             ctdb_addr_to_str(&vnn->public_address),
648                             vnn->public_netmask_bits,
649                             old->name,
650                             new_name));
651
652         ret = ctdb_event_script_callback(ctdb,
653                                          state,
654                                          ctdb_do_updateip_callback,
655                                          state,
656                                          CTDB_EVENT_UPDATE_IP,
657                                          "%s %s %s %u",
658                                          state->old->name,
659                                          new_name,
660                                          ctdb_addr_to_str(&vnn->public_address),
661                                          vnn->public_netmask_bits);
662         if (ret != 0) {
663                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
664                                  ctdb_addr_to_str(&vnn->public_address),
665                                  old->name, new_name));
666                 talloc_free(state);
667                 return -1;
668         }
669
670         return 0;
671 }
672
673 /*
674   Find the vnn of the node that has a public ip address
675   returns -1 if the address is not known as a public address
676  */
677 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
678 {
679         struct ctdb_vnn *vnn;
680
681         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
682                 if (ctdb_same_ip(&vnn->public_address, addr)) {
683                         return vnn;
684                 }
685         }
686
687         return NULL;
688 }
689
690 /*
691   take over an ip address
692  */
693 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
694                                  struct ctdb_req_control_old *c,
695                                  TDB_DATA indata,
696                                  bool *async_reply)
697 {
698         int ret;
699         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
700         struct ctdb_vnn *vnn;
701         bool have_ip = false;
702         bool do_updateip = false;
703         bool do_takeip = false;
704         struct ctdb_interface *best_iface = NULL;
705
706         if (pip->pnn != ctdb->pnn) {
707                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
708                                  "with pnn %d, but we're node %d\n",
709                                  ctdb_addr_to_str(&pip->addr),
710                                  pip->pnn, ctdb->pnn));
711                 return -1;
712         }
713
714         /* update out vnn list */
715         vnn = find_public_ip_vnn(ctdb, &pip->addr);
716         if (vnn == NULL) {
717                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
718                         ctdb_addr_to_str(&pip->addr)));
719                 return 0;
720         }
721
722         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
723                 have_ip = ctdb_sys_have_ip(&pip->addr);
724         }
725         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
726         if (best_iface == NULL) {
727                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
728                                  "a usable interface (old %s, have_ip %d)\n",
729                                  ctdb_addr_to_str(&vnn->public_address),
730                                  vnn->public_netmask_bits,
731                                  ctdb_vnn_iface_string(vnn),
732                                  have_ip));
733                 return -1;
734         }
735
736         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
737                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
738                 have_ip = false;
739         }
740
741
742         if (vnn->iface == NULL && have_ip) {
743                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
744                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
745                                  ctdb_addr_to_str(&vnn->public_address)));
746                 return 0;
747         }
748
749         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
750                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
751                                   "and we have it on iface[%s], but it was assigned to node %d"
752                                   "and we are node %d, banning ourself\n",
753                                  ctdb_addr_to_str(&vnn->public_address),
754                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
755                 ctdb_ban_self(ctdb);
756                 return -1;
757         }
758
759         if (vnn->pnn == -1 && have_ip) {
760                 vnn->pnn = ctdb->pnn;
761                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
762                                   "and we already have it on iface[%s], update local daemon\n",
763                                  ctdb_addr_to_str(&vnn->public_address),
764                                   ctdb_vnn_iface_string(vnn)));
765                 return 0;
766         }
767
768         if (vnn->iface) {
769                 if (vnn->iface != best_iface) {
770                         if (!vnn->iface->link_up) {
771                                 do_updateip = true;
772                         } else if (vnn->iface->references > (best_iface->references + 1)) {
773                                 /* only move when the rebalance gains something */
774                                         do_updateip = true;
775                         }
776                 }
777         }
778
779         if (!have_ip) {
780                 if (do_updateip) {
781                         ctdb_vnn_unassign_iface(ctdb, vnn);
782                         do_updateip = false;
783                 }
784                 do_takeip = true;
785         }
786
787         if (do_takeip) {
788                 ret = ctdb_do_takeip(ctdb, c, vnn);
789                 if (ret != 0) {
790                         return -1;
791                 }
792         } else if (do_updateip) {
793                 ret = ctdb_do_updateip(ctdb, c, vnn);
794                 if (ret != 0) {
795                         return -1;
796                 }
797         } else {
798                 /*
799                  * The interface is up and the kernel known the ip
800                  * => do nothing
801                  */
802                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
803                         ctdb_addr_to_str(&pip->addr),
804                         vnn->public_netmask_bits,
805                         ctdb_vnn_iface_string(vnn)));
806                 return 0;
807         }
808
809         /* tell ctdb_control.c that we will be replying asynchronously */
810         *async_reply = true;
811
812         return 0;
813 }
814
815 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
816 {
817         DLIST_REMOVE(ctdb->vnn, vnn);
818         ctdb_vnn_unassign_iface(ctdb, vnn);
819         ctdb_remove_orphaned_ifaces(ctdb, vnn);
820         talloc_free(vnn);
821 }
822
823 /*
824   called when releaseip event finishes
825  */
826 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
827                                 void *private_data)
828 {
829         struct takeover_callback_state *state = 
830                 talloc_get_type(private_data, struct takeover_callback_state);
831         TDB_DATA data;
832
833         if (status == -ETIME) {
834                 ctdb_ban_self(ctdb);
835         }
836
837         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
838                 if  (ctdb_sys_have_ip(state->addr)) {
839                         DEBUG(DEBUG_ERR,
840                               ("IP %s still hosted during release IP callback, failing\n",
841                                ctdb_addr_to_str(state->addr)));
842                         ctdb_request_control_reply(ctdb, state->c,
843                                                    NULL, -1, NULL);
844                         talloc_free(state);
845                         return;
846                 }
847         }
848
849         /* send a message to all clients of this node telling them
850            that the cluster has been reconfigured and they should
851            release any sockets on this IP */
852         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
853         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
854         data.dsize = strlen((char *)data.dptr)+1;
855
856         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
857
858         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
859
860         ctdb_vnn_unassign_iface(ctdb, state->vnn);
861
862         /* Process the IP if it has been marked for deletion */
863         if (state->vnn->delete_pending) {
864                 do_delete_ip(ctdb, state->vnn);
865                 state->vnn = NULL;
866         }
867
868         /* the control succeeded */
869         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
870         talloc_free(state);
871 }
872
873 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
874 {
875         if (state->vnn != NULL) {
876                 state->vnn->update_in_flight = false;
877         }
878         return 0;
879 }
880
881 /*
882   release an ip address
883  */
884 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
885                                 struct ctdb_req_control_old *c,
886                                 TDB_DATA indata, 
887                                 bool *async_reply)
888 {
889         int ret;
890         struct takeover_callback_state *state;
891         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
892         struct ctdb_vnn *vnn;
893         char *iface;
894
895         /* update our vnn list */
896         vnn = find_public_ip_vnn(ctdb, &pip->addr);
897         if (vnn == NULL) {
898                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
899                         ctdb_addr_to_str(&pip->addr)));
900                 return 0;
901         }
902         vnn->pnn = pip->pnn;
903
904         /* stop any previous arps */
905         talloc_free(vnn->takeover_ctx);
906         vnn->takeover_ctx = NULL;
907
908         /* Some ctdb tool commands (e.g. moveip) send
909          * lazy multicast to drop an IP from any node that isn't the
910          * intended new node.  The following causes makes ctdbd ignore
911          * a release for any address it doesn't host.
912          */
913         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
914                 if (!ctdb_sys_have_ip(&pip->addr)) {
915                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
916                                 ctdb_addr_to_str(&pip->addr),
917                                 vnn->public_netmask_bits,
918                                 ctdb_vnn_iface_string(vnn)));
919                         ctdb_vnn_unassign_iface(ctdb, vnn);
920                         return 0;
921                 }
922         } else {
923                 if (vnn->iface == NULL) {
924                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
925                                            ctdb_addr_to_str(&pip->addr),
926                                            vnn->public_netmask_bits));
927                         return 0;
928                 }
929         }
930
931         /* There is a potential race between take_ip and us because we
932          * update the VNN via a callback that run when the
933          * eventscripts have been run.  Avoid the race by allowing one
934          * update to be in flight at a time.
935          */
936         if (vnn->update_in_flight) {
937                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
938                                     "update for this IP already in flight\n",
939                                     ctdb_addr_to_str(&vnn->public_address),
940                                     vnn->public_netmask_bits));
941                 return -1;
942         }
943
944         iface = strdup(ctdb_vnn_iface_string(vnn));
945
946         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
947                 ctdb_addr_to_str(&pip->addr),
948                 vnn->public_netmask_bits,
949                 iface,
950                 pip->pnn));
951
952         state = talloc(ctdb, struct takeover_callback_state);
953         if (state == NULL) {
954                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
955                                __FILE__, __LINE__);
956                 free(iface);
957                 return -1;
958         }
959
960         state->c = talloc_steal(state, c);
961         state->addr = talloc(state, ctdb_sock_addr);       
962         if (state->addr == NULL) {
963                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
964                                __FILE__, __LINE__);
965                 free(iface);
966                 talloc_free(state);
967                 return -1;
968         }
969         *state->addr = pip->addr;
970         state->vnn   = vnn;
971
972         vnn->update_in_flight = true;
973         talloc_set_destructor(state, ctdb_releaseip_destructor);
974
975         ret = ctdb_event_script_callback(ctdb, 
976                                          state, release_ip_callback, state,
977                                          CTDB_EVENT_RELEASE_IP,
978                                          "%s %s %u",
979                                          iface,
980                                          ctdb_addr_to_str(&pip->addr),
981                                          vnn->public_netmask_bits);
982         free(iface);
983         if (ret != 0) {
984                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
985                         ctdb_addr_to_str(&pip->addr),
986                         ctdb_vnn_iface_string(vnn)));
987                 talloc_free(state);
988                 return -1;
989         }
990
991         /* tell the control that we will be reply asynchronously */
992         *async_reply = true;
993         return 0;
994 }
995
996 static int ctdb_add_public_address(struct ctdb_context *ctdb,
997                                    ctdb_sock_addr *addr,
998                                    unsigned mask, const char *ifaces,
999                                    bool check_address)
1000 {
1001         struct ctdb_vnn      *vnn;
1002         uint32_t num = 0;
1003         char *tmp;
1004         const char *iface;
1005         int i;
1006         int ret;
1007
1008         tmp = strdup(ifaces);
1009         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1010                 if (!ctdb_sys_check_iface_exists(iface)) {
1011                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1012                         free(tmp);
1013                         return -1;
1014                 }
1015         }
1016         free(tmp);
1017
1018         /* Verify that we don't have an entry for this ip yet */
1019         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1020                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1021                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1022                                 ctdb_addr_to_str(addr)));
1023                         return -1;
1024                 }               
1025         }
1026
1027         /* create a new vnn structure for this ip address */
1028         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1029         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1030         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1031         tmp = talloc_strdup(vnn, ifaces);
1032         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1033         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1034                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1035                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1036                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1037                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1038                 num++;
1039         }
1040         talloc_free(tmp);
1041         vnn->ifaces[num] = NULL;
1042         vnn->public_address      = *addr;
1043         vnn->public_netmask_bits = mask;
1044         vnn->pnn                 = -1;
1045         if (check_address) {
1046                 if (ctdb_sys_have_ip(addr)) {
1047                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1048                         vnn->pnn = ctdb->pnn;
1049                 }
1050         }
1051
1052         for (i=0; vnn->ifaces[i]; i++) {
1053                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1054                 if (ret != 0) {
1055                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1056                                            "for public_address[%s]\n",
1057                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1058                         talloc_free(vnn);
1059                         return -1;
1060                 }
1061         }
1062
1063         DLIST_ADD(ctdb->vnn, vnn);
1064
1065         return 0;
1066 }
1067
1068 /*
1069   setup the public address lists from a file
1070 */
1071 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1072 {
1073         char **lines;
1074         int nlines;
1075         int i;
1076
1077         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1078         if (lines == NULL) {
1079                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1080                 return -1;
1081         }
1082         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1083                 nlines--;
1084         }
1085
1086         for (i=0;i<nlines;i++) {
1087                 unsigned mask;
1088                 ctdb_sock_addr addr;
1089                 const char *addrstr;
1090                 const char *ifaces;
1091                 char *tok, *line;
1092
1093                 line = lines[i];
1094                 while ((*line == ' ') || (*line == '\t')) {
1095                         line++;
1096                 }
1097                 if (*line == '#') {
1098                         continue;
1099                 }
1100                 if (strcmp(line, "") == 0) {
1101                         continue;
1102                 }
1103                 tok = strtok(line, " \t");
1104                 addrstr = tok;
1105                 tok = strtok(NULL, " \t");
1106                 if (tok == NULL) {
1107                         if (NULL == ctdb->default_public_interface) {
1108                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1109                                          i+1));
1110                                 talloc_free(lines);
1111                                 return -1;
1112                         }
1113                         ifaces = ctdb->default_public_interface;
1114                 } else {
1115                         ifaces = tok;
1116                 }
1117
1118                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1119                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1120                         talloc_free(lines);
1121                         return -1;
1122                 }
1123                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1124                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1125                         talloc_free(lines);
1126                         return -1;
1127                 }
1128         }
1129
1130
1131         talloc_free(lines);
1132         return 0;
1133 }
1134
1135 static void *add_ip_callback(void *parm, void *data)
1136 {
1137         struct public_ip_list *this_ip = parm;
1138         struct public_ip_list *prev_ip = data;
1139
1140         if (prev_ip == NULL) {
1141                 return parm;
1142         }
1143         if (this_ip->pnn == -1) {
1144                 this_ip->pnn = prev_ip->pnn;
1145         }
1146
1147         return parm;
1148 }
1149
1150 static int getips_count_callback(void *param, void *data)
1151 {
1152         struct public_ip_list **ip_list = (struct public_ip_list **)param;
1153         struct public_ip_list *new_ip = (struct public_ip_list *)data;
1154
1155         new_ip->next = *ip_list;
1156         *ip_list     = new_ip;
1157         return 0;
1158 }
1159
1160 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1161                                        struct ctdb_public_ip_list *ips,
1162                                        uint32_t pnn);
1163
1164 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1165                                          struct ipalloc_state *ipalloc_state,
1166                                          struct ctdb_node_map_old *nodemap)
1167 {
1168         int j;
1169         int ret;
1170         struct ctdb_public_ip_list_old *ip_list;
1171
1172         if (ipalloc_state->num != nodemap->num) {
1173                 DEBUG(DEBUG_ERR,
1174                       (__location__
1175                        " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1176                        ipalloc_state->num, nodemap->num));
1177                 return -1;
1178         }
1179
1180         for (j=0; j<nodemap->num; j++) {
1181                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1182                         continue;
1183                 }
1184
1185                 /* Retrieve the list of known public IPs from the node */
1186                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1187                                         TAKEOVER_TIMEOUT(),
1188                                         j,
1189                                         ipalloc_state->known_public_ips,
1190                                         0,
1191                                         &ip_list);
1192                 if (ret != 0) {
1193                         DEBUG(DEBUG_ERR,
1194                               ("Failed to read known public IPs from node: %u\n",
1195                                j));
1196                         return -1;
1197                 }
1198                 ipalloc_state->known_public_ips[j].num = ip_list->num;
1199                 /* This could be copied and freed.  However, ip_list
1200                  * is allocated off ipalloc_state->known_public_ips,
1201                  * so this is a safe hack.  This will go away in a
1202                  * while anyway... */
1203                 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1204
1205                 if (ctdb->do_checkpublicip) {
1206                         verify_remote_ip_allocation(
1207                                 ctdb,
1208                                 &ipalloc_state->known_public_ips[j],
1209                                 j);
1210                 }
1211
1212                 /* Retrieve the list of available public IPs from the node */
1213                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1214                                         TAKEOVER_TIMEOUT(),
1215                                         j,
1216                                         ipalloc_state->available_public_ips,
1217                                         CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1218                                         &ip_list);
1219                 if (ret != 0) {
1220                         DEBUG(DEBUG_ERR,
1221                               ("Failed to read available public IPs from node: %u\n",
1222                                j));
1223                         return -1;
1224                 }
1225                 ipalloc_state->available_public_ips[j].num = ip_list->num;
1226                 /* This could be copied and freed.  However, ip_list
1227                  * is allocated off ipalloc_state->available_public_ips,
1228                  * so this is a safe hack.  This will go away in a
1229                  * while anyway... */
1230                 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1231         }
1232
1233         return 0;
1234 }
1235
1236 static struct public_ip_list *
1237 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1238 {
1239         int i, j;
1240         struct public_ip_list *ip_list;
1241         struct ctdb_public_ip_list *public_ips;
1242
1243         TALLOC_FREE(ctdb->ip_tree);
1244         ctdb->ip_tree = trbt_create(ctdb, 0);
1245
1246         for (i=0; i < ctdb->num_nodes; i++) {
1247
1248                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1249                         continue;
1250                 }
1251
1252                 /* there were no public ips for this node */
1253                 if (ipalloc_state->known_public_ips == NULL) {
1254                         continue;
1255                 }
1256
1257                 public_ips = &ipalloc_state->known_public_ips[i];
1258
1259                 for (j=0; j < public_ips->num; j++) {
1260                         struct public_ip_list *tmp_ip;
1261
1262                         tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1263                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1264                         /* Do not use information about IP addresses hosted
1265                          * on other nodes, it may not be accurate */
1266                         if (public_ips->ip[j].pnn == ctdb->nodes[i]->pnn) {
1267                                 tmp_ip->pnn = public_ips->ip[j].pnn;
1268                         } else {
1269                                 tmp_ip->pnn = -1;
1270                         }
1271                         tmp_ip->addr = public_ips->ip[j].addr;
1272                         tmp_ip->next = NULL;
1273
1274                         trbt_insertarray32_callback(ctdb->ip_tree,
1275                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1276                                 add_ip_callback,
1277                                 tmp_ip);
1278                 }
1279         }
1280
1281         ip_list = NULL;
1282         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1283
1284         return ip_list;
1285 }
1286
1287 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1288 {
1289         int i;
1290
1291         for (i=0;i<nodemap->num;i++) {
1292                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1293                         /* Found one completely healthy node */
1294                         return false;
1295                 }
1296         }
1297
1298         return true;
1299 }
1300
1301 struct get_tunable_callback_data {
1302         const char *tunable;
1303         uint32_t *out;
1304         bool fatal;
1305 };
1306
1307 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1308                                  int32_t res, TDB_DATA outdata,
1309                                  void *callback)
1310 {
1311         struct get_tunable_callback_data *cd =
1312                 (struct get_tunable_callback_data *)callback;
1313         int size;
1314
1315         if (res != 0) {
1316                 /* Already handled in fail callback */
1317                 return;
1318         }
1319
1320         if (outdata.dsize != sizeof(uint32_t)) {
1321                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1322                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1323                                  (int)outdata.dsize));
1324                 cd->fatal = true;
1325                 return;
1326         }
1327
1328         size = talloc_array_length(cd->out);
1329         if (pnn >= size) {
1330                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1331                                  cd->tunable, pnn, size));
1332                 return;
1333         }
1334
1335                 
1336         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1337 }
1338
1339 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1340                                        int32_t res, TDB_DATA outdata,
1341                                        void *callback)
1342 {
1343         struct get_tunable_callback_data *cd =
1344                 (struct get_tunable_callback_data *)callback;
1345
1346         switch (res) {
1347         case -ETIME:
1348                 DEBUG(DEBUG_ERR,
1349                       ("Timed out getting tunable \"%s\" from node %d\n",
1350                        cd->tunable, pnn));
1351                 cd->fatal = true;
1352                 break;
1353         case -EINVAL:
1354         case -1:
1355                 DEBUG(DEBUG_WARNING,
1356                       ("Tunable \"%s\" not implemented on node %d\n",
1357                        cd->tunable, pnn));
1358                 break;
1359         default:
1360                 DEBUG(DEBUG_ERR,
1361                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1362                        cd->tunable, pnn));
1363                 cd->fatal = true;
1364         }
1365 }
1366
1367 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1368                                         TALLOC_CTX *tmp_ctx,
1369                                         struct ctdb_node_map_old *nodemap,
1370                                         const char *tunable,
1371                                         uint32_t default_value)
1372 {
1373         TDB_DATA data;
1374         struct ctdb_control_get_tunable *t;
1375         uint32_t *nodes;
1376         uint32_t *tvals;
1377         struct get_tunable_callback_data callback_data;
1378         int i;
1379
1380         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1381         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1382         for (i=0; i<nodemap->num; i++) {
1383                 tvals[i] = default_value;
1384         }
1385                 
1386         callback_data.out = tvals;
1387         callback_data.tunable = tunable;
1388         callback_data.fatal = false;
1389
1390         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1391         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1392         t = (struct ctdb_control_get_tunable *)data.dptr;
1393         t->length = strlen(tunable)+1;
1394         memcpy(t->name, tunable, t->length);
1395         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1396         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1397                                       nodes, 0, TAKEOVER_TIMEOUT(),
1398                                       false, data,
1399                                       get_tunable_callback,
1400                                       get_tunable_fail_callback,
1401                                       &callback_data) != 0) {
1402                 if (callback_data.fatal) {
1403                         talloc_free(tvals);
1404                         tvals = NULL;
1405                 }
1406         }
1407         talloc_free(nodes);
1408         talloc_free(data.dptr);
1409
1410         return tvals;
1411 }
1412
1413 /* Set internal flags for IP allocation:
1414  *   Clear ip flags
1415  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1416  *   Set NOIPHOST ip flag for each INACTIVE node
1417  *   if all nodes are disabled:
1418  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1419  *   else
1420  *     Set NOIPHOST ip flags for disabled nodes
1421  */
1422 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1423                                  struct ctdb_node_map_old *nodemap,
1424                                  uint32_t *tval_noiptakeover,
1425                                  uint32_t *tval_noiphostonalldisabled)
1426 {
1427         int i;
1428
1429         for (i=0;i<nodemap->num;i++) {
1430                 /* Can not take IPs on node with NoIPTakeover set */
1431                 if (tval_noiptakeover[i] != 0) {
1432                         ipalloc_state->noiptakeover[i] = true;
1433                 }
1434
1435                 /* Can not host IPs on INACTIVE node */
1436                 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1437                         ipalloc_state->noiphost[i] = true;
1438                 }
1439         }
1440
1441         if (all_nodes_are_disabled(nodemap)) {
1442                 /* If all nodes are disabled, can not host IPs on node
1443                  * with NoIPHostOnAllDisabled set
1444                  */
1445                 for (i=0;i<nodemap->num;i++) {
1446                         if (tval_noiphostonalldisabled[i] != 0) {
1447                                 ipalloc_state->noiphost[i] = true;
1448                         }
1449                 }
1450         } else {
1451                 /* If some nodes are not disabled, then can not host
1452                  * IPs on DISABLED node
1453                  */
1454                 for (i=0;i<nodemap->num;i++) {
1455                         if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1456                                 ipalloc_state->noiphost[i] = true;
1457                         }
1458                 }
1459         }
1460 }
1461
1462 static bool set_ipflags(struct ctdb_context *ctdb,
1463                         struct ipalloc_state *ipalloc_state,
1464                         struct ctdb_node_map_old *nodemap)
1465 {
1466         uint32_t *tval_noiptakeover;
1467         uint32_t *tval_noiphostonalldisabled;
1468
1469         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1470                                                    "NoIPTakeover", 0);
1471         if (tval_noiptakeover == NULL) {
1472                 return false;
1473         }
1474
1475         tval_noiphostonalldisabled =
1476                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1477                                        "NoIPHostOnAllDisabled", 0);
1478         if (tval_noiphostonalldisabled == NULL) {
1479                 /* Caller frees tmp_ctx */
1480                 return false;
1481         }
1482
1483         set_ipflags_internal(ipalloc_state, nodemap,
1484                              tval_noiptakeover,
1485                              tval_noiphostonalldisabled);
1486
1487         talloc_free(tval_noiptakeover);
1488         talloc_free(tval_noiphostonalldisabled);
1489
1490         return true;
1491 }
1492
1493 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1494                                                  TALLOC_CTX *mem_ctx)
1495 {
1496         struct ipalloc_state *ipalloc_state =
1497                 talloc_zero(mem_ctx, struct ipalloc_state);
1498         if (ipalloc_state == NULL) {
1499                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1500                 return NULL;
1501         }
1502
1503         ipalloc_state->num = ctdb->num_nodes;
1504
1505         ipalloc_state->known_public_ips =
1506                 talloc_zero_array(ipalloc_state,
1507                                   struct ctdb_public_ip_list,
1508                                   ipalloc_state->num);
1509         if (ipalloc_state->known_public_ips == NULL) {
1510                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1511                 goto fail;
1512         }
1513
1514         ipalloc_state->available_public_ips =
1515                 talloc_zero_array(ipalloc_state,
1516                                   struct ctdb_public_ip_list,
1517                                   ipalloc_state->num);
1518         if (ipalloc_state->available_public_ips == NULL) {
1519                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1520                 goto fail;
1521         }
1522         ipalloc_state->noiptakeover =
1523                 talloc_zero_array(ipalloc_state,
1524                                   bool,
1525                                   ipalloc_state->num);
1526         if (ipalloc_state->noiptakeover == NULL) {
1527                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1528                 goto fail;
1529         }
1530         ipalloc_state->noiphost =
1531                 talloc_zero_array(ipalloc_state,
1532                                   bool,
1533                                   ipalloc_state->num);
1534         if (ipalloc_state->noiphost == NULL) {
1535                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1536                 goto fail;
1537         }
1538
1539         if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1540                 ipalloc_state->algorithm = IPALLOC_LCP2;
1541         } else if (1 == ctdb->tunable.deterministic_public_ips) {
1542                 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1543         } else {
1544                 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1545         }
1546
1547         ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1548
1549         return ipalloc_state;
1550 fail:
1551         talloc_free(ipalloc_state);
1552         return NULL;
1553 }
1554
1555 struct iprealloc_callback_data {
1556         bool *retry_nodes;
1557         int retry_count;
1558         client_async_callback fail_callback;
1559         void *fail_callback_data;
1560         struct ctdb_node_map_old *nodemap;
1561 };
1562
1563 static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1564                                         int32_t res, TDB_DATA outdata,
1565                                         void *callback)
1566 {
1567         int numnodes;
1568         struct iprealloc_callback_data *cd =
1569                 (struct iprealloc_callback_data *)callback;
1570
1571         numnodes = talloc_array_length(cd->retry_nodes);
1572         if (pnn > numnodes) {
1573                 DEBUG(DEBUG_ERR,
1574                       ("ipreallocated failure from node %d, "
1575                        "but only %d nodes in nodemap\n",
1576                        pnn, numnodes));
1577                 return;
1578         }
1579
1580         /* Can't run the "ipreallocated" event on a INACTIVE node */
1581         if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
1582                 DEBUG(DEBUG_WARNING,
1583                       ("ipreallocated failed on inactive node %d, ignoring\n",
1584                        pnn));
1585                 return;
1586         }
1587
1588         switch (res) {
1589         case -ETIME:
1590                 /* If the control timed out then that's a real error,
1591                  * so call the real fail callback
1592                  */
1593                 if (cd->fail_callback) {
1594                         cd->fail_callback(ctdb, pnn, res, outdata,
1595                                           cd->fail_callback_data);
1596                 } else {
1597                         DEBUG(DEBUG_WARNING,
1598                               ("iprealloc timed out but no callback registered\n"));
1599                 }
1600                 break;
1601         default:
1602                 /* If not a timeout then either the ipreallocated
1603                  * eventscript (or some setup) failed.  This might
1604                  * have failed because the IPREALLOCATED control isn't
1605                  * implemented - right now there is no way of knowing
1606                  * because the error codes are all folded down to -1.
1607                  * Consider retrying using EVENTSCRIPT control...
1608                  */
1609                 DEBUG(DEBUG_WARNING,
1610                       ("ipreallocated failure from node %d, flagging retry\n",
1611                        pnn));
1612                 cd->retry_nodes[pnn] = true;
1613                 cd->retry_count++;
1614         }
1615 }
1616
1617 struct takeover_callback_data {
1618         bool *node_failed;
1619         client_async_callback fail_callback;
1620         void *fail_callback_data;
1621         struct ctdb_node_map_old *nodemap;
1622 };
1623
1624 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1625                                        uint32_t node_pnn, int32_t res,
1626                                        TDB_DATA outdata, void *callback_data)
1627 {
1628         struct takeover_callback_data *cd =
1629                 talloc_get_type_abort(callback_data,
1630                                       struct takeover_callback_data);
1631         int i;
1632
1633         for (i = 0; i < cd->nodemap->num; i++) {
1634                 if (node_pnn == cd->nodemap->nodes[i].pnn) {
1635                         break;
1636                 }
1637         }
1638
1639         if (i == cd->nodemap->num) {
1640                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1641                 return;
1642         }
1643
1644         if (!cd->node_failed[i]) {
1645                 cd->node_failed[i] = true;
1646                 cd->fail_callback(ctdb, node_pnn, res, outdata,
1647                                   cd->fail_callback_data);
1648         }
1649 }
1650
1651 /*
1652  * Recalculate the allocation of public IPs to nodes and have the
1653  * nodes host their allocated addresses.
1654  *
1655  * - Allocate memory for IP allocation state, including per node
1656  *   arrays
1657  * - Populate IP allocation algorithm in IP allocation state
1658  * - Populate local value of tunable NoIPFailback in IP allocation
1659      state - this is really a cluster-wide configuration variable and
1660      only the value form the master node is used
1661  * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1662  *   connected nodes - this is done separately so tunable values can
1663  *   be faked in unit testing
1664  * - Populate NoIPTakover tunable in IP allocation state
1665  * - Populate NoIPHost in IP allocation state, derived from node flags
1666  *   and NoIPHostOnAllDisabled tunable
1667  * - Retrieve and populate known and available IP lists in IP
1668  *   allocation state
1669  * - If no available IP addresses then early exit
1670  * - Build list of (known IPs, currently assigned node)
1671  * - Populate list of nodes to force rebalance - internal structure,
1672  *   currently no way to fetch, only used by LCP2 for nodes that have
1673  *   had new IP addresses added
1674  * - Run IP allocation algorithm
1675  * - Send RELEASE_IP to all nodes for IPs they should not host
1676  * - Send TAKE_IP to all nodes for IPs they should host
1677  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1678  */
1679 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1680                       uint32_t *force_rebalance_nodes,
1681                       client_async_callback fail_callback, void *callback_data)
1682 {
1683         int i, j, ret;
1684         struct ctdb_public_ip ip;
1685         uint32_t *nodes;
1686         struct public_ip_list *all_ips, *tmp_ip;
1687         TDB_DATA data;
1688         struct timeval timeout;
1689         struct client_async_data *async_data;
1690         struct ctdb_client_control_state *state;
1691         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1692         struct ipalloc_state *ipalloc_state;
1693         struct takeover_callback_data *takeover_data;
1694         struct iprealloc_callback_data iprealloc_data;
1695         bool *retry_data;
1696         bool can_host_ips;
1697
1698         /*
1699          * ip failover is completely disabled, just send out the 
1700          * ipreallocated event.
1701          */
1702         if (ctdb->tunable.disable_ip_failover != 0) {
1703                 goto ipreallocated;
1704         }
1705
1706         ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1707         if (ipalloc_state == NULL) {
1708                 talloc_free(tmp_ctx);
1709                 return -1;
1710         }
1711
1712         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1713                 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1714                 talloc_free(tmp_ctx);
1715                 return -1;
1716         }
1717
1718         /* Fetch known/available public IPs from each active node */
1719         ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1720         if (ret != 0) {
1721                 talloc_free(tmp_ctx);
1722                 return -1;
1723         }
1724
1725         /* Short-circuit IP allocation if no node has available IPs */
1726         can_host_ips = false;
1727         for (i=0; i < ipalloc_state->num; i++) {
1728                 if (ipalloc_state->available_public_ips[i].num != 0) {
1729                         can_host_ips = true;
1730                 }
1731         }
1732         if (!can_host_ips) {
1733                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1734                 return 0;
1735         }
1736
1737         /* since nodes only know about those public addresses that
1738            can be served by that particular node, no single node has
1739            a full list of all public addresses that exist in the cluster.
1740            Walk over all node structures and create a merged list of
1741            all public addresses that exist in the cluster.
1742
1743            keep the tree of ips around as ctdb->ip_tree
1744         */
1745         all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1746         ipalloc_state->all_ips = all_ips;
1747
1748         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1749
1750         /* Do the IP reassignment calculations */
1751         ipalloc(ipalloc_state);
1752
1753         /* Now tell all nodes to release any public IPs should not
1754          * host.  This will be a NOOP on nodes that don't currently
1755          * hold the given IP.
1756          */
1757         takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
1758         CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
1759
1760         takeover_data->node_failed = talloc_zero_array(tmp_ctx,
1761                                                        bool, nodemap->num);
1762         CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
1763         takeover_data->fail_callback = fail_callback;
1764         takeover_data->fail_callback_data = callback_data;
1765         takeover_data->nodemap = nodemap;
1766
1767         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1768         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1769
1770         async_data->fail_callback = takeover_run_fail_callback;
1771         async_data->callback_data = takeover_data;
1772
1773         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1774
1775         /* Send a RELEASE_IP to all nodes that should not be hosting
1776          * each IP.  For each IP, all but one of these will be
1777          * redundant.  However, the redundant ones are used to tell
1778          * nodes which node should be hosting the IP so that commands
1779          * like "ctdb ip" can display a particular nodes idea of who
1780          * is hosting what. */
1781         for (i=0;i<nodemap->num;i++) {
1782                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1783                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1784                         continue;
1785                 }
1786
1787                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1788                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1789                                 /* This node should be serving this
1790                                    vnn so don't tell it to release the ip
1791                                 */
1792                                 continue;
1793                         }
1794                         ip.pnn  = tmp_ip->pnn;
1795                         ip.addr = tmp_ip->addr;
1796
1797                         timeout = TAKEOVER_TIMEOUT();
1798                         data.dsize = sizeof(ip);
1799                         data.dptr  = (uint8_t *)&ip;
1800                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1801                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1802                                                   data, async_data,
1803                                                   &timeout, NULL);
1804                         if (state == NULL) {
1805                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1806                                 talloc_free(tmp_ctx);
1807                                 return -1;
1808                         }
1809
1810                         ctdb_client_async_add(async_data, state);
1811                 }
1812         }
1813         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1814                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1815                 talloc_free(tmp_ctx);
1816                 return -1;
1817         }
1818         talloc_free(async_data);
1819
1820
1821         /* For each IP, send a TAKOVER_IP to the node that should be
1822          * hosting it.  Many of these will often be redundant (since
1823          * the allocation won't have changed) but they can be useful
1824          * to recover from inconsistencies. */
1825         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1826         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1827
1828         async_data->fail_callback = fail_callback;
1829         async_data->callback_data = callback_data;
1830
1831         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1832                 if (tmp_ip->pnn == -1) {
1833                         /* this IP won't be taken over */
1834                         continue;
1835                 }
1836
1837                 ip.pnn  = tmp_ip->pnn;
1838                 ip.addr = tmp_ip->addr;
1839
1840                 timeout = TAKEOVER_TIMEOUT();
1841                 data.dsize = sizeof(ip);
1842                 data.dptr  = (uint8_t *)&ip;
1843                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1844                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1845                                           data, async_data, &timeout, NULL);
1846                 if (state == NULL) {
1847                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1848                         talloc_free(tmp_ctx);
1849                         return -1;
1850                 }
1851
1852                 ctdb_client_async_add(async_data, state);
1853         }
1854         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1855                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1856                 talloc_free(tmp_ctx);
1857                 return -1;
1858         }
1859
1860 ipreallocated:
1861         /*
1862          * Tell all nodes to run eventscripts to process the
1863          * "ipreallocated" event.  This can do a lot of things,
1864          * including restarting services to reconfigure them if public
1865          * IPs have moved.  Once upon a time this event only used to
1866          * update natgw.
1867          */
1868         retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
1869         CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
1870         iprealloc_data.retry_nodes = retry_data;
1871         iprealloc_data.retry_count = 0;
1872         iprealloc_data.fail_callback = fail_callback;
1873         iprealloc_data.fail_callback_data = callback_data;
1874         iprealloc_data.nodemap = nodemap;
1875
1876         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1877         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1878                                         nodes, 0, TAKEOVER_TIMEOUT(),
1879                                         false, tdb_null,
1880                                         NULL, iprealloc_fail_callback,
1881                                         &iprealloc_data);
1882         if (ret != 0) {
1883                 /* If the control failed then we should retry to any
1884                  * nodes flagged by iprealloc_fail_callback using the
1885                  * EVENTSCRIPT control.  This is a best-effort at
1886                  * backward compatiblity when running a mixed cluster
1887                  * where some nodes have not yet been upgraded to
1888                  * support the IPREALLOCATED control.
1889                  */
1890                 DEBUG(DEBUG_WARNING,
1891                       ("Retry ipreallocated to some nodes using eventscript control\n"));
1892
1893                 nodes = talloc_array(tmp_ctx, uint32_t,
1894                                      iprealloc_data.retry_count);
1895                 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
1896
1897                 j = 0;
1898                 for (i=0; i<nodemap->num; i++) {
1899                         if (iprealloc_data.retry_nodes[i]) {
1900                                 nodes[j] = i;
1901                                 j++;
1902                         }
1903                 }
1904
1905                 data.dptr  = discard_const("ipreallocated");
1906                 data.dsize = strlen((char *)data.dptr) + 1; 
1907                 ret = ctdb_client_async_control(ctdb,
1908                                                 CTDB_CONTROL_RUN_EVENTSCRIPTS,
1909                                                 nodes, 0, TAKEOVER_TIMEOUT(),
1910                                                 false, data,
1911                                                 NULL, fail_callback,
1912                                                 callback_data);
1913                 if (ret != 0) {
1914                         DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
1915                 }
1916         }
1917
1918         talloc_free(tmp_ctx);
1919         return ret;
1920 }
1921
1922
1923 /*
1924   destroy a ctdb_client_ip structure
1925  */
1926 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1927 {
1928         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1929                 ctdb_addr_to_str(&ip->addr),
1930                 ntohs(ip->addr.ip.sin_port),
1931                 ip->client_id));
1932
1933         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1934         return 0;
1935 }
1936
1937 /*
1938   called by a client to inform us of a TCP connection that it is managing
1939   that should tickled with an ACK when IP takeover is done
1940  */
1941 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1942                                 TDB_DATA indata)
1943 {
1944         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1945         struct ctdb_connection *tcp_sock = NULL;
1946         struct ctdb_tcp_list *tcp;
1947         struct ctdb_connection t;
1948         int ret;
1949         TDB_DATA data;
1950         struct ctdb_client_ip *ip;
1951         struct ctdb_vnn *vnn;
1952         ctdb_sock_addr addr;
1953
1954         /* If we don't have public IPs, tickles are useless */
1955         if (ctdb->vnn == NULL) {
1956                 return 0;
1957         }
1958
1959         tcp_sock = (struct ctdb_connection *)indata.dptr;
1960
1961         addr = tcp_sock->src;
1962         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1963         addr = tcp_sock->dst;
1964         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1965
1966         ZERO_STRUCT(addr);
1967         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1968         vnn = find_public_ip_vnn(ctdb, &addr);
1969         if (vnn == NULL) {
1970                 switch (addr.sa.sa_family) {
1971                 case AF_INET:
1972                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1973                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1974                                         ctdb_addr_to_str(&addr)));
1975                         }
1976                         break;
1977                 case AF_INET6:
1978                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1979                                 ctdb_addr_to_str(&addr)));
1980                         break;
1981                 default:
1982                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1983                 }
1984
1985                 return 0;
1986         }
1987
1988         if (vnn->pnn != ctdb->pnn) {
1989                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1990                         ctdb_addr_to_str(&addr),
1991                         client_id, client->pid));
1992                 /* failing this call will tell smbd to die */
1993                 return -1;
1994         }
1995
1996         ip = talloc(client, struct ctdb_client_ip);
1997         CTDB_NO_MEMORY(ctdb, ip);
1998
1999         ip->ctdb      = ctdb;
2000         ip->addr      = addr;
2001         ip->client_id = client_id;
2002         talloc_set_destructor(ip, ctdb_client_ip_destructor);
2003         DLIST_ADD(ctdb->client_ip_list, ip);
2004
2005         tcp = talloc(client, struct ctdb_tcp_list);
2006         CTDB_NO_MEMORY(ctdb, tcp);
2007
2008         tcp->connection.src = tcp_sock->src;
2009         tcp->connection.dst = tcp_sock->dst;
2010
2011         DLIST_ADD(client->tcp_list, tcp);
2012
2013         t.src = tcp_sock->src;
2014         t.dst = tcp_sock->dst;
2015
2016         data.dptr = (uint8_t *)&t;
2017         data.dsize = sizeof(t);
2018
2019         switch (addr.sa.sa_family) {
2020         case AF_INET:
2021                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2022                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
2023                         ctdb_addr_to_str(&tcp_sock->src),
2024                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2025                 break;
2026         case AF_INET6:
2027                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2028                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
2029                         ctdb_addr_to_str(&tcp_sock->src),
2030                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2031                 break;
2032         default:
2033                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2034         }
2035
2036
2037         /* tell all nodes about this tcp connection */
2038         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2039                                        CTDB_CONTROL_TCP_ADD,
2040                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2041         if (ret != 0) {
2042                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2043                 return -1;
2044         }
2045
2046         return 0;
2047 }
2048
2049 /*
2050   find a tcp address on a list
2051  */
2052 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2053                                            struct ctdb_connection *tcp)
2054 {
2055         int i;
2056
2057         if (array == NULL) {
2058                 return NULL;
2059         }
2060
2061         for (i=0;i<array->num;i++) {
2062                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2063                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2064                         return &array->connections[i];
2065                 }
2066         }
2067         return NULL;
2068 }
2069
2070
2071
2072 /*
2073   called by a daemon to inform us of a TCP connection that one of its
2074   clients managing that should tickled with an ACK when IP takeover is
2075   done
2076  */
2077 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2078 {
2079         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2080         struct ctdb_tcp_array *tcparray;
2081         struct ctdb_connection tcp;
2082         struct ctdb_vnn *vnn;
2083
2084         /* If we don't have public IPs, tickles are useless */
2085         if (ctdb->vnn == NULL) {
2086                 return 0;
2087         }
2088
2089         vnn = find_public_ip_vnn(ctdb, &p->dst);
2090         if (vnn == NULL) {
2091                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2092                         ctdb_addr_to_str(&p->dst)));
2093
2094                 return -1;
2095         }
2096
2097
2098         tcparray = vnn->tcp_array;
2099
2100         /* If this is the first tickle */
2101         if (tcparray == NULL) {
2102                 tcparray = talloc(vnn, struct ctdb_tcp_array);
2103                 CTDB_NO_MEMORY(ctdb, tcparray);
2104                 vnn->tcp_array = tcparray;
2105
2106                 tcparray->num = 0;
2107                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2108                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2109
2110                 tcparray->connections[tcparray->num].src = p->src;
2111                 tcparray->connections[tcparray->num].dst = p->dst;
2112                 tcparray->num++;
2113
2114                 if (tcp_update_needed) {
2115                         vnn->tcp_update_needed = true;
2116                 }
2117                 return 0;
2118         }
2119
2120
2121         /* Do we already have this tickle ?*/
2122         tcp.src = p->src;
2123         tcp.dst = p->dst;
2124         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2125                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2126                         ctdb_addr_to_str(&tcp.dst),
2127                         ntohs(tcp.dst.ip.sin_port),
2128                         vnn->pnn));
2129                 return 0;
2130         }
2131
2132         /* A new tickle, we must add it to the array */
2133         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2134                                         struct ctdb_connection,
2135                                         tcparray->num+1);
2136         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2137
2138         tcparray->connections[tcparray->num].src = p->src;
2139         tcparray->connections[tcparray->num].dst = p->dst;
2140         tcparray->num++;
2141
2142         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2143                 ctdb_addr_to_str(&tcp.dst),
2144                 ntohs(tcp.dst.ip.sin_port),
2145                 vnn->pnn));
2146
2147         if (tcp_update_needed) {
2148                 vnn->tcp_update_needed = true;
2149         }
2150
2151         return 0;
2152 }
2153
2154
2155 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2156 {
2157         struct ctdb_connection *tcpp;
2158
2159         if (vnn == NULL) {
2160                 return;
2161         }
2162
2163         /* if the array is empty we cant remove it
2164            and we don't need to do anything
2165          */
2166         if (vnn->tcp_array == NULL) {
2167                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2168                         ctdb_addr_to_str(&conn->dst),
2169                         ntohs(conn->dst.ip.sin_port)));
2170                 return;
2171         }
2172
2173
2174         /* See if we know this connection
2175            if we don't know this connection  then we dont need to do anything
2176          */
2177         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2178         if (tcpp == NULL) {
2179                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2180                         ctdb_addr_to_str(&conn->dst),
2181                         ntohs(conn->dst.ip.sin_port)));
2182                 return;
2183         }
2184
2185
2186         /* We need to remove this entry from the array.
2187            Instead of allocating a new array and copying data to it
2188            we cheat and just copy the last entry in the existing array
2189            to the entry that is to be removed and just shring the 
2190            ->num field
2191          */
2192         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2193         vnn->tcp_array->num--;
2194
2195         /* If we deleted the last entry we also need to remove the entire array
2196          */
2197         if (vnn->tcp_array->num == 0) {
2198                 talloc_free(vnn->tcp_array);
2199                 vnn->tcp_array = NULL;
2200         }               
2201
2202         vnn->tcp_update_needed = true;
2203
2204         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2205                 ctdb_addr_to_str(&conn->src),
2206                 ntohs(conn->src.ip.sin_port)));
2207 }
2208
2209
2210 /*
2211   called by a daemon to inform us of a TCP connection that one of its
2212   clients used are no longer needed in the tickle database
2213  */
2214 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2215 {
2216         struct ctdb_vnn *vnn;
2217         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2218
2219         /* If we don't have public IPs, tickles are useless */
2220         if (ctdb->vnn == NULL) {
2221                 return 0;
2222         }
2223
2224         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2225         if (vnn == NULL) {
2226                 DEBUG(DEBUG_ERR,
2227                       (__location__ " unable to find public address %s\n",
2228                        ctdb_addr_to_str(&conn->dst)));
2229                 return 0;
2230         }
2231
2232         ctdb_remove_connection(vnn, conn);
2233
2234         return 0;
2235 }
2236
2237
2238 /*
2239   Called when another daemon starts - causes all tickles for all
2240   public addresses we are serving to be sent to the new node on the
2241   next check.  This actually causes the next scheduled call to
2242   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2243   doesn't require careful error handling.
2244  */
2245 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2246 {
2247         struct ctdb_vnn *vnn;
2248
2249         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2250                            (unsigned long) pnn));
2251
2252         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2253                 vnn->tcp_update_needed = true;
2254         }
2255
2256         return 0;
2257 }
2258
2259
2260 /*
2261   called when a client structure goes away - hook to remove
2262   elements from the tcp_list in all daemons
2263  */
2264 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2265 {
2266         while (client->tcp_list) {
2267                 struct ctdb_vnn *vnn;
2268                 struct ctdb_tcp_list *tcp = client->tcp_list;
2269                 struct ctdb_connection *conn = &tcp->connection;
2270
2271                 DLIST_REMOVE(client->tcp_list, tcp);
2272
2273                 vnn = find_public_ip_vnn(client->ctdb,
2274                                          &conn->dst);
2275                 if (vnn == NULL) {
2276                         DEBUG(DEBUG_ERR,
2277                               (__location__ " unable to find public address %s\n",
2278                                ctdb_addr_to_str(&conn->dst)));
2279                         continue;
2280                 }
2281
2282                 /* If the IP address is hosted on this node then
2283                  * remove the connection. */
2284                 if (vnn->pnn == client->ctdb->pnn) {
2285                         ctdb_remove_connection(vnn, conn);
2286                 }
2287
2288                 /* Otherwise this function has been called because the
2289                  * server IP address has been released to another node
2290                  * and the client has exited.  This means that we
2291                  * should not delete the connection information.  The
2292                  * takeover node processes connections too. */
2293         }
2294 }
2295
2296
2297 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2298 {
2299         struct ctdb_vnn *vnn;
2300         int count = 0;
2301         TDB_DATA data;
2302
2303         if (ctdb->tunable.disable_ip_failover == 1) {
2304                 return;
2305         }
2306
2307         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2308                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2309                         ctdb_vnn_unassign_iface(ctdb, vnn);
2310                         continue;
2311                 }
2312                 if (!vnn->iface) {
2313                         continue;
2314                 }
2315
2316                 /* Don't allow multiple releases at once.  Some code,
2317                  * particularly ctdb_tickle_sentenced_connections() is
2318                  * not re-entrant */
2319                 if (vnn->update_in_flight) {
2320                         DEBUG(DEBUG_WARNING,
2321                               (__location__
2322                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2323                                     ctdb_addr_to_str(&vnn->public_address),
2324                                     vnn->public_netmask_bits,
2325                                     ctdb_vnn_iface_string(vnn)));
2326                         continue;
2327                 }
2328                 vnn->update_in_flight = true;
2329
2330                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2331                                     ctdb_addr_to_str(&vnn->public_address),
2332                                     vnn->public_netmask_bits,
2333                                     ctdb_vnn_iface_string(vnn)));
2334
2335                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2336                                   ctdb_vnn_iface_string(vnn),
2337                                   ctdb_addr_to_str(&vnn->public_address),
2338                                   vnn->public_netmask_bits);
2339
2340                 data.dptr = (uint8_t *)talloc_strdup(
2341                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2342                 if (data.dptr != NULL) {
2343                         data.dsize = strlen((char *)data.dptr) + 1;
2344                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2345                                                  CTDB_SRVID_RELEASE_IP, data);
2346                         talloc_free(data.dptr);
2347                 }
2348
2349                 ctdb_vnn_unassign_iface(ctdb, vnn);
2350                 vnn->update_in_flight = false;
2351                 count++;
2352         }
2353
2354         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2355 }
2356
2357
2358 /*
2359   get list of public IPs
2360  */
2361 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2362                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2363 {
2364         int i, num, len;
2365         struct ctdb_public_ip_list_old *ips;
2366         struct ctdb_vnn *vnn;
2367         bool only_available = false;
2368
2369         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2370                 only_available = true;
2371         }
2372
2373         /* count how many public ip structures we have */
2374         num = 0;
2375         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2376                 num++;
2377         }
2378
2379         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2380                 num*sizeof(struct ctdb_public_ip);
2381         ips = talloc_zero_size(outdata, len);
2382         CTDB_NO_MEMORY(ctdb, ips);
2383
2384         i = 0;
2385         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2386                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2387                         continue;
2388                 }
2389                 ips->ips[i].pnn  = vnn->pnn;
2390                 ips->ips[i].addr = vnn->public_address;
2391                 i++;
2392         }
2393         ips->num = i;
2394         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2395                 i*sizeof(struct ctdb_public_ip);
2396
2397         outdata->dsize = len;
2398         outdata->dptr  = (uint8_t *)ips;
2399
2400         return 0;
2401 }
2402
2403
2404 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2405                                         struct ctdb_req_control_old *c,
2406                                         TDB_DATA indata,
2407                                         TDB_DATA *outdata)
2408 {
2409         int i, num, len;
2410         ctdb_sock_addr *addr;
2411         struct ctdb_public_ip_info_old *info;
2412         struct ctdb_vnn *vnn;
2413
2414         addr = (ctdb_sock_addr *)indata.dptr;
2415
2416         vnn = find_public_ip_vnn(ctdb, addr);
2417         if (vnn == NULL) {
2418                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2419                                  "'%s'not a public address\n",
2420                                  ctdb_addr_to_str(addr)));
2421                 return -1;
2422         }
2423
2424         /* count how many public ip structures we have */
2425         num = 0;
2426         for (;vnn->ifaces[num];) {
2427                 num++;
2428         }
2429
2430         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2431                 num*sizeof(struct ctdb_iface);
2432         info = talloc_zero_size(outdata, len);
2433         CTDB_NO_MEMORY(ctdb, info);
2434
2435         info->ip.addr = vnn->public_address;
2436         info->ip.pnn = vnn->pnn;
2437         info->active_idx = 0xFFFFFFFF;
2438
2439         for (i=0; vnn->ifaces[i]; i++) {
2440                 struct ctdb_interface *cur;
2441
2442                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2443                 if (cur == NULL) {
2444                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2445                                            vnn->ifaces[i]));
2446                         return -1;
2447                 }
2448                 if (vnn->iface == cur) {
2449                         info->active_idx = i;
2450                 }
2451                 strncpy(info->ifaces[i].name, cur->name,
2452                         sizeof(info->ifaces[i].name));
2453                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2454                 info->ifaces[i].link_state = cur->link_up;
2455                 info->ifaces[i].references = cur->references;
2456         }
2457         info->num = i;
2458         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2459                 i*sizeof(struct ctdb_iface);
2460
2461         outdata->dsize = len;
2462         outdata->dptr  = (uint8_t *)info;
2463
2464         return 0;
2465 }
2466
2467 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2468                                 struct ctdb_req_control_old *c,
2469                                 TDB_DATA *outdata)
2470 {
2471         int i, num, len;
2472         struct ctdb_iface_list_old *ifaces;
2473         struct ctdb_interface *cur;
2474
2475         /* count how many public ip structures we have */
2476         num = 0;
2477         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2478                 num++;
2479         }
2480
2481         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2482                 num*sizeof(struct ctdb_iface);
2483         ifaces = talloc_zero_size(outdata, len);
2484         CTDB_NO_MEMORY(ctdb, ifaces);
2485
2486         i = 0;
2487         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2488                 strncpy(ifaces->ifaces[i].name, cur->name,
2489                         sizeof(ifaces->ifaces[i].name));
2490                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2491                 ifaces->ifaces[i].link_state = cur->link_up;
2492                 ifaces->ifaces[i].references = cur->references;
2493                 i++;
2494         }
2495         ifaces->num = i;
2496         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2497                 i*sizeof(struct ctdb_iface);
2498
2499         outdata->dsize = len;
2500         outdata->dptr  = (uint8_t *)ifaces;
2501
2502         return 0;
2503 }
2504
2505 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2506                                     struct ctdb_req_control_old *c,
2507                                     TDB_DATA indata)
2508 {
2509         struct ctdb_iface *info;
2510         struct ctdb_interface *iface;
2511         bool link_up = false;
2512
2513         info = (struct ctdb_iface *)indata.dptr;
2514
2515         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2516                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2517                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2518                                   len, len, info->name));
2519                 return -1;
2520         }
2521
2522         switch (info->link_state) {
2523         case 0:
2524                 link_up = false;
2525                 break;
2526         case 1:
2527                 link_up = true;
2528                 break;
2529         default:
2530                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2531                                   (unsigned int)info->link_state));
2532                 return -1;
2533         }
2534
2535         if (info->references != 0) {
2536                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2537                                   (unsigned int)info->references));
2538                 return -1;
2539         }
2540
2541         iface = ctdb_find_iface(ctdb, info->name);
2542         if (iface == NULL) {
2543                 return -1;
2544         }
2545
2546         if (link_up == iface->link_up) {
2547                 return 0;
2548         }
2549
2550         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2551               ("iface[%s] has changed it's link status %s => %s\n",
2552                iface->name,
2553                iface->link_up?"up":"down",
2554                link_up?"up":"down"));
2555
2556         iface->link_up = link_up;
2557         return 0;
2558 }
2559
2560
2561 /*
2562   called by a daemon to inform us of the entire list of TCP tickles for
2563   a particular public address.
2564   this control should only be sent by the node that is currently serving
2565   that public address.
2566  */
2567 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2568 {
2569         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2570         struct ctdb_tcp_array *tcparray;
2571         struct ctdb_vnn *vnn;
2572
2573         /* We must at least have tickles.num or else we cant verify the size
2574            of the received data blob
2575          */
2576         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2577                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2578                 return -1;
2579         }
2580
2581         /* verify that the size of data matches what we expect */
2582         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2583                          + sizeof(struct ctdb_connection) * list->num) {
2584                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2585                 return -1;
2586         }
2587
2588         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2589                            ctdb_addr_to_str(&list->addr)));
2590
2591         vnn = find_public_ip_vnn(ctdb, &list->addr);
2592         if (vnn == NULL) {
2593                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2594                         ctdb_addr_to_str(&list->addr)));
2595
2596                 return 1;
2597         }
2598
2599         if (vnn->pnn == ctdb->pnn) {
2600                 DEBUG(DEBUG_INFO,
2601                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2602                        ctdb_addr_to_str(&list->addr)));
2603                 return 0;
2604         }
2605
2606         /* remove any old ticklelist we might have */
2607         talloc_free(vnn->tcp_array);
2608         vnn->tcp_array = NULL;
2609
2610         tcparray = talloc(vnn, struct ctdb_tcp_array);
2611         CTDB_NO_MEMORY(ctdb, tcparray);
2612
2613         tcparray->num = list->num;
2614
2615         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2616         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2617
2618         memcpy(tcparray->connections, &list->connections[0],
2619                sizeof(struct ctdb_connection)*tcparray->num);
2620
2621         /* We now have a new fresh tickle list array for this vnn */
2622         vnn->tcp_array = tcparray;
2623
2624         return 0;
2625 }
2626
2627 /*
2628   called to return the full list of tickles for the puclic address associated 
2629   with the provided vnn
2630  */
2631 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2632 {
2633         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2634         struct ctdb_tickle_list_old *list;
2635         struct ctdb_tcp_array *tcparray;
2636         int num, i;
2637         struct ctdb_vnn *vnn;
2638         unsigned port;
2639
2640         vnn = find_public_ip_vnn(ctdb, addr);
2641         if (vnn == NULL) {
2642                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2643                         ctdb_addr_to_str(addr)));
2644
2645                 return 1;
2646         }
2647
2648         port = ctdb_addr_to_port(addr);
2649
2650         tcparray = vnn->tcp_array;
2651         num = 0;
2652         if (tcparray != NULL) {
2653                 if (port == 0) {
2654                         /* All connections */
2655                         num = tcparray->num;
2656                 } else {
2657                         /* Count connections for port */
2658                         for (i = 0; i < tcparray->num; i++) {
2659                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2660                                         num++;
2661                                 }
2662                         }
2663                 }
2664         }
2665
2666         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2667                         + sizeof(struct ctdb_connection) * num;
2668
2669         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2670         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2671         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2672
2673         list->addr = *addr;
2674         list->num = num;
2675
2676         if (num == 0) {
2677                 return 0;
2678         }
2679
2680         num = 0;
2681         for (i = 0; i < tcparray->num; i++) {
2682                 if (port == 0 || \
2683                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2684                         list->connections[num] = tcparray->connections[i];
2685                         num++;
2686                 }
2687         }
2688
2689         return 0;
2690 }
2691
2692
2693 /*
2694   set the list of all tcp tickles for a public address
2695  */
2696 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2697                                             ctdb_sock_addr *addr,
2698                                             struct ctdb_tcp_array *tcparray)
2699 {
2700         int ret, num;
2701         TDB_DATA data;
2702         struct ctdb_tickle_list_old *list;
2703
2704         if (tcparray) {
2705                 num = tcparray->num;
2706         } else {
2707                 num = 0;
2708         }
2709
2710         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2711                         sizeof(struct ctdb_connection) * num;
2712         data.dptr = talloc_size(ctdb, data.dsize);
2713         CTDB_NO_MEMORY(ctdb, data.dptr);
2714
2715         list = (struct ctdb_tickle_list_old *)data.dptr;
2716         list->addr = *addr;
2717         list->num = num;
2718         if (tcparray) {
2719                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2720         }
2721
2722         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2723                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2724                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2725         if (ret != 0) {
2726                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2727                 return -1;
2728         }
2729
2730         talloc_free(data.dptr);
2731
2732         return ret;
2733 }
2734
2735
2736 /*
2737   perform tickle updates if required
2738  */
2739 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2740                                     struct tevent_timer *te,
2741                                     struct timeval t, void *private_data)
2742 {
2743         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2744         int ret;
2745         struct ctdb_vnn *vnn;
2746
2747         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2748                 /* we only send out updates for public addresses that 
2749                    we have taken over
2750                  */
2751                 if (ctdb->pnn != vnn->pnn) {
2752                         continue;
2753                 }
2754                 /* We only send out the updates if we need to */
2755                 if (!vnn->tcp_update_needed) {
2756                         continue;
2757                 }
2758                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2759                                                        &vnn->public_address,
2760                                                        vnn->tcp_array);
2761                 if (ret != 0) {
2762                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2763                                 ctdb_addr_to_str(&vnn->public_address)));
2764                 } else {
2765                         DEBUG(DEBUG_INFO,
2766                               ("Sent tickle update for public address %s\n",
2767                                ctdb_addr_to_str(&vnn->public_address)));
2768                         vnn->tcp_update_needed = false;
2769                 }
2770         }
2771
2772         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2773                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2774                          ctdb_update_tcp_tickles, ctdb);
2775 }
2776
2777 /*
2778   start periodic update of tcp tickles
2779  */
2780 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2781 {
2782         ctdb->tickle_update_context = talloc_new(ctdb);
2783
2784         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2785                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2786                          ctdb_update_tcp_tickles, ctdb);
2787 }
2788
2789
2790
2791
2792 struct control_gratious_arp {
2793         struct ctdb_context *ctdb;
2794         ctdb_sock_addr addr;
2795         const char *iface;
2796         int count;
2797 };
2798
2799 /*
2800   send a control_gratuitous arp
2801  */
2802 static void send_gratious_arp(struct tevent_context *ev,
2803                               struct tevent_timer *te,
2804                               struct timeval t, void *private_data)
2805 {
2806         int ret;
2807         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2808                                                         struct control_gratious_arp);
2809
2810         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2811         if (ret != 0) {
2812                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2813                                  arp->iface, strerror(errno)));
2814         }
2815
2816
2817         arp->count++;
2818         if (arp->count == CTDB_ARP_REPEAT) {
2819                 talloc_free(arp);
2820                 return;
2821         }
2822
2823         tevent_add_timer(arp->ctdb->ev, arp,
2824                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2825                          send_gratious_arp, arp);
2826 }
2827
2828
2829 /*
2830   send a gratious arp 
2831  */
2832 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2833 {
2834         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2835         struct control_gratious_arp *arp;
2836
2837         /* verify the size of indata */
2838         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2839                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2840                                  (unsigned)indata.dsize, 
2841                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2842                 return -1;
2843         }
2844         if (indata.dsize != 
2845                 ( offsetof(struct ctdb_addr_info_old, iface)
2846                 + gratious_arp->len ) ){
2847
2848                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2849                         "but should be %u bytes\n", 
2850                          (unsigned)indata.dsize, 
2851                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2852                 return -1;
2853         }
2854
2855
2856         arp = talloc(ctdb, struct control_gratious_arp);
2857         CTDB_NO_MEMORY(ctdb, arp);
2858
2859         arp->ctdb  = ctdb;
2860         arp->addr   = gratious_arp->addr;
2861         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2862         CTDB_NO_MEMORY(ctdb, arp->iface);
2863         arp->count = 0;
2864
2865         tevent_add_timer(arp->ctdb->ev, arp,
2866                          timeval_zero(), send_gratious_arp, arp);
2867
2868         return 0;
2869 }
2870
2871 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2872 {
2873         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2874         int ret;
2875
2876         /* verify the size of indata */
2877         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2878                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2879                 return -1;
2880         }
2881         if (indata.dsize != 
2882                 ( offsetof(struct ctdb_addr_info_old, iface)
2883                 + pub->len ) ){
2884
2885                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2886                         "but should be %u bytes\n", 
2887                          (unsigned)indata.dsize, 
2888                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2889                 return -1;
2890         }
2891
2892         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2893
2894         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2895
2896         if (ret != 0) {
2897                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2898                 return -1;
2899         }
2900
2901         return 0;
2902 }
2903
2904 struct delete_ip_callback_state {
2905         struct ctdb_req_control_old *c;
2906 };
2907
2908 /*
2909   called when releaseip event finishes for del_public_address
2910  */
2911 static void delete_ip_callback(struct ctdb_context *ctdb,
2912                                int32_t status, TDB_DATA data,
2913                                const char *errormsg,
2914                                void *private_data)
2915 {
2916         struct delete_ip_callback_state *state =
2917                 talloc_get_type(private_data, struct delete_ip_callback_state);
2918
2919         /* If release failed then fail. */
2920         ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2921         talloc_free(private_data);
2922 }
2923
2924 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2925                                         struct ctdb_req_control_old *c,
2926                                         TDB_DATA indata, bool *async_reply)
2927 {
2928         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2929         struct ctdb_vnn *vnn;
2930
2931         /* verify the size of indata */
2932         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2933                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2934                 return -1;
2935         }
2936         if (indata.dsize != 
2937                 ( offsetof(struct ctdb_addr_info_old, iface)
2938                 + pub->len ) ){
2939
2940                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2941                         "but should be %u bytes\n", 
2942                          (unsigned)indata.dsize, 
2943                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2944                 return -1;
2945         }
2946
2947         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2948
2949         /* walk over all public addresses until we find a match */
2950         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2951                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2952                         if (vnn->pnn == ctdb->pnn) {
2953                                 struct delete_ip_callback_state *state;
2954                                 struct ctdb_public_ip *ip;
2955                                 TDB_DATA data;
2956                                 int ret;
2957
2958                                 vnn->delete_pending = true;
2959
2960                                 state = talloc(ctdb,
2961                                                struct delete_ip_callback_state);
2962                                 CTDB_NO_MEMORY(ctdb, state);
2963                                 state->c = c;
2964
2965                                 ip = talloc(state, struct ctdb_public_ip);
2966                                 if (ip == NULL) {
2967                                         DEBUG(DEBUG_ERR,
2968                                               (__location__ " Out of memory\n"));
2969                                         talloc_free(state);
2970                                         return -1;
2971                                 }
2972                                 ip->pnn = -1;
2973                                 ip->addr = pub->addr;
2974
2975                                 data.dsize = sizeof(struct ctdb_public_ip);
2976                                 data.dptr = (unsigned char *)ip;
2977
2978                                 ret = ctdb_daemon_send_control(ctdb,
2979                                                                ctdb_get_pnn(ctdb),
2980                                                                0,
2981                                                                CTDB_CONTROL_RELEASE_IP,
2982                                                                0, 0,
2983                                                                data,
2984                                                                delete_ip_callback,
2985                                                                state);
2986                                 if (ret == -1) {
2987                                         DEBUG(DEBUG_ERR,
2988                                               (__location__ "Unable to send "
2989                                                "CTDB_CONTROL_RELEASE_IP\n"));
2990                                         talloc_free(state);
2991                                         return -1;
2992                                 }
2993
2994                                 state->c = talloc_steal(state, c);
2995                                 *async_reply = true;
2996                         } else {
2997                                 /* This IP is not hosted on the
2998                                  * current node so just delete it
2999                                  * now. */
3000                                 do_delete_ip(ctdb, vnn);
3001                         }
3002
3003                         return 0;
3004                 }
3005         }
3006
3007         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
3008                          ctdb_addr_to_str(&pub->addr)));
3009         return -1;
3010 }
3011
3012
3013 struct ipreallocated_callback_state {
3014         struct ctdb_req_control_old *c;
3015 };
3016
3017 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
3018                                         int status, void *p)
3019 {
3020         struct ipreallocated_callback_state *state =
3021                 talloc_get_type(p, struct ipreallocated_callback_state);
3022
3023         if (status != 0) {
3024                 DEBUG(DEBUG_ERR,
3025                       (" \"ipreallocated\" event script failed (status %d)\n",
3026                        status));
3027                 if (status == -ETIME) {
3028                         ctdb_ban_self(ctdb);
3029                 }
3030         }
3031
3032         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
3033         talloc_free(state);
3034 }
3035
3036 /* A control to run the ipreallocated event */
3037 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
3038                                    struct ctdb_req_control_old *c,
3039                                    bool *async_reply)
3040 {
3041         int ret;
3042         struct ipreallocated_callback_state *state;
3043
3044         state = talloc(ctdb, struct ipreallocated_callback_state);
3045         CTDB_NO_MEMORY(ctdb, state);
3046
3047         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3048
3049         ret = ctdb_event_script_callback(ctdb, state,
3050                                          ctdb_ipreallocated_callback, state,
3051                                          CTDB_EVENT_IPREALLOCATED,
3052                                          "%s", "");
3053
3054         if (ret != 0) {
3055                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3056                 talloc_free(state);
3057                 return -1;
3058         }
3059
3060         /* tell the control that we will be reply asynchronously */
3061         state->c    = talloc_steal(state, c);
3062         *async_reply = true;
3063
3064         return 0;
3065 }
3066
3067
3068 /* This function is called from the recovery daemon to verify that a remote
3069    node has the expected ip allocation.
3070    This is verified against ctdb->ip_tree
3071 */
3072 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
3073                                        struct ctdb_public_ip_list *ips,
3074                                        uint32_t pnn)
3075 {
3076         struct public_ip_list *tmp_ip;
3077         int i;
3078
3079         if (ctdb->ip_tree == NULL) {
3080                 /* don't know the expected allocation yet, assume remote node
3081                    is correct. */
3082                 return 0;
3083         }
3084
3085         if (ips == NULL) {
3086                 return 0;
3087         }
3088
3089         for (i=0; i<ips->num; i++) {
3090                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
3091                 if (tmp_ip == NULL) {
3092                         DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
3093                         return -1;
3094                 }
3095
3096                 if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
3097                         continue;
3098                 }
3099
3100                 if (tmp_ip->pnn != ips->ip[i].pnn) {
3101                         DEBUG(DEBUG_ERR,
3102                               ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3103                                pnn,
3104                                ctdb_addr_to_str(&ips->ip[i].addr),
3105                                ips->ip[i].pnn, tmp_ip->pnn));
3106                         return -1;
3107                 }
3108         }
3109
3110         return 0;
3111 }
3112
3113 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3114 {
3115         struct public_ip_list *tmp_ip;
3116
3117         /* IP tree is never built if DisableIPFailover is set */
3118         if (ctdb->tunable.disable_ip_failover != 0) {
3119                 return 0;
3120         }
3121
3122         if (ctdb->ip_tree == NULL) {
3123                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3124                 return -1;
3125         }
3126
3127         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3128         if (tmp_ip == NULL) {
3129                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3130                 return -1;
3131         }
3132
3133         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3134         tmp_ip->pnn = ip->pnn;
3135
3136         return 0;
3137 }
3138
3139 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3140 {
3141         TALLOC_FREE(ctdb->ip_tree);
3142 }
3143
3144 struct ctdb_reloadips_handle {
3145         struct ctdb_context *ctdb;
3146         struct ctdb_req_control_old *c;
3147         int status;
3148         int fd[2];
3149         pid_t child;
3150         struct tevent_fd *fde;
3151 };
3152
3153 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3154 {
3155         if (h == h->ctdb->reload_ips) {
3156                 h->ctdb->reload_ips = NULL;
3157         }
3158         if (h->c != NULL) {
3159                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3160                 h->c = NULL;
3161         }
3162         ctdb_kill(h->ctdb, h->child, SIGKILL);
3163         return 0;
3164 }
3165
3166 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3167                                          struct tevent_timer *te,
3168                                          struct timeval t, void *private_data)
3169 {
3170         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3171
3172         talloc_free(h);
3173 }
3174
3175 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3176                                          struct tevent_fd *fde,
3177                                          uint16_t flags, void *private_data)
3178 {
3179         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3180
3181         char res;
3182         int ret;
3183
3184         ret = sys_read(h->fd[0], &res, 1);
3185         if (ret < 1 || res != 0) {
3186                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3187                 res = 1;
3188         }
3189         h->status = res;
3190
3191         talloc_free(h);
3192 }
3193
3194 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3195 {
3196         TALLOC_CTX *mem_ctx = talloc_new(NULL);
3197         struct ctdb_public_ip_list_old *ips;
3198         struct ctdb_vnn *vnn;
3199         struct client_async_data *async_data;
3200         struct timeval timeout;
3201         TDB_DATA data;
3202         struct ctdb_client_control_state *state;
3203         bool first_add;
3204         int i, ret;
3205
3206         CTDB_NO_MEMORY(ctdb, mem_ctx);
3207
3208         /* Read IPs from local node */
3209         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3210                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
3211         if (ret != 0) {
3212                 DEBUG(DEBUG_ERR,
3213                       ("Unable to fetch public IPs from local node\n"));
3214                 talloc_free(mem_ctx);
3215                 return -1;
3216         }
3217
3218         /* Read IPs file - this is safe since this is a child process */
3219         ctdb->vnn = NULL;
3220         if (ctdb_set_public_addresses(ctdb, false) != 0) {
3221                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3222                 talloc_free(mem_ctx);
3223                 return -1;
3224         }
3225
3226         async_data = talloc_zero(mem_ctx, struct client_async_data);
3227         CTDB_NO_MEMORY(ctdb, async_data);
3228
3229         /* Compare IPs between node and file for IPs to be deleted */
3230         for (i = 0; i < ips->num; i++) {
3231                 /* */
3232                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3233                         if (ctdb_same_ip(&vnn->public_address,
3234                                          &ips->ips[i].addr)) {
3235                                 /* IP is still in file */
3236                                 break;
3237                         }
3238                 }
3239
3240                 if (vnn == NULL) {
3241                         /* Delete IP ips->ips[i] */
3242                         struct ctdb_addr_info_old *pub;
3243
3244                         DEBUG(DEBUG_NOTICE,
3245                               ("IP %s no longer configured, deleting it\n",
3246                                ctdb_addr_to_str(&ips->ips[i].addr)));
3247
3248                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3249                         CTDB_NO_MEMORY(ctdb, pub);
3250
3251                         pub->addr  = ips->ips[i].addr;
3252                         pub->mask  = 0;
3253                         pub->len   = 0;
3254
3255                         timeout = TAKEOVER_TIMEOUT();
3256
3257                         data.dsize = offsetof(struct ctdb_addr_info_old,
3258                                               iface) + pub->len;
3259                         data.dptr = (uint8_t *)pub;
3260
3261                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3262                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
3263                                                   0, data, async_data,
3264                                                   &timeout, NULL);
3265                         if (state == NULL) {
3266                                 DEBUG(DEBUG_ERR,
3267                                       (__location__
3268                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3269                                 goto failed;
3270                         }
3271
3272                         ctdb_client_async_add(async_data, state);
3273                 }
3274         }
3275
3276         /* Compare IPs between node and file for IPs to be added */
3277         first_add = true;
3278         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3279                 for (i = 0; i < ips->num; i++) {
3280                         if (ctdb_same_ip(&vnn->public_address,
3281                                          &ips->ips[i].addr)) {
3282                                 /* IP already on node */
3283                                 break;
3284                         }
3285                 }
3286                 if (i == ips->num) {
3287                         /* Add IP ips->ips[i] */
3288                         struct ctdb_addr_info_old *pub;
3289                         const char *ifaces = NULL;
3290                         uint32_t len;
3291                         int iface = 0;
3292
3293                         DEBUG(DEBUG_NOTICE,
3294                               ("New IP %s configured, adding it\n",
3295                                ctdb_addr_to_str(&vnn->public_address)));
3296                         if (first_add) {
3297                                 uint32_t pnn = ctdb_get_pnn(ctdb);
3298
3299                                 data.dsize = sizeof(pnn);
3300                                 data.dptr  = (uint8_t *)&pnn;
3301
3302                                 ret = ctdb_client_send_message(
3303                                         ctdb,
3304                                         CTDB_BROADCAST_CONNECTED,
3305                                         CTDB_SRVID_REBALANCE_NODE,
3306                                         data);
3307                                 if (ret != 0) {
3308                                         DEBUG(DEBUG_WARNING,
3309                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3310                                 }
3311
3312                                 first_add = false;
3313                         }
3314
3315                         ifaces = vnn->ifaces[0];
3316                         iface = 1;
3317                         while (vnn->ifaces[iface] != NULL) {
3318                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3319                                                          vnn->ifaces[iface]);
3320                                 iface++;
3321                         }
3322
3323                         len   = strlen(ifaces) + 1;
3324                         pub = talloc_zero_size(mem_ctx,
3325                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3326                         CTDB_NO_MEMORY(ctdb, pub);
3327
3328                         pub->addr  = vnn->public_address;
3329                         pub->mask  = vnn->public_netmask_bits;
3330                         pub->len   = len;
3331                         memcpy(&pub->iface[0], ifaces, pub->len);
3332
3333                         timeout = TAKEOVER_TIMEOUT();
3334
3335                         data.dsize = offsetof(struct ctdb_addr_info_old,
3336                                               iface) + pub->len;
3337                         data.dptr = (uint8_t *)pub;
3338
3339                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3340                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3341                                                   0, data, async_data,
3342                                                   &timeout, NULL);
3343                         if (state == NULL) {
3344                                 DEBUG(DEBUG_ERR,
3345                                       (__location__
3346                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3347                                 goto failed;
3348                         }
3349
3350                         ctdb_client_async_add(async_data, state);
3351                 }
3352         }
3353
3354         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3355                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3356                 goto failed;
3357         }
3358
3359         talloc_free(mem_ctx);
3360         return 0;
3361
3362 failed:
3363         talloc_free(mem_ctx);
3364         return -1;
3365 }
3366
3367 /* This control is sent to force the node to re-read the public addresses file
3368    and drop any addresses we should nnot longer host, and add new addresses
3369    that we are now able to host
3370 */
3371 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3372 {
3373         struct ctdb_reloadips_handle *h;
3374         pid_t parent = getpid();
3375
3376         if (ctdb->reload_ips != NULL) {
3377                 talloc_free(ctdb->reload_ips);
3378                 ctdb->reload_ips = NULL;
3379         }
3380
3381         h = talloc(ctdb, struct ctdb_reloadips_handle);
3382         CTDB_NO_MEMORY(ctdb, h);
3383         h->ctdb     = ctdb;
3384         h->c        = NULL;
3385         h->status   = -1;
3386         
3387         if (pipe(h->fd) == -1) {
3388                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3389                 talloc_free(h);
3390                 return -1;
3391         }
3392
3393         h->child = ctdb_fork(ctdb);
3394         if (h->child == (pid_t)-1) {
3395                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3396                 close(h->fd[0]);
3397                 close(h->fd[1]);
3398                 talloc_free(h);
3399                 return -1;
3400         }
3401
3402         /* child process */
3403         if (h->child == 0) {
3404                 signed char res = 0;
3405
3406                 close(h->fd[0]);
3407                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3408
3409                 prctl_set_comment("ctdb_reloadips");
3410                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3411                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3412                         res = -1;
3413                 } else {
3414                         res = ctdb_reloadips_child(ctdb);
3415                         if (res != 0) {
3416                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3417                         }
3418                 }
3419
3420                 sys_write(h->fd[1], &res, 1);
3421                 ctdb_wait_for_process_to_exit(parent);
3422                 _exit(0);
3423         }
3424
3425         h->c             = talloc_steal(h, c);
3426
3427         close(h->fd[1]);
3428         set_close_on_exec(h->fd[0]);
3429
3430         talloc_set_destructor(h, ctdb_reloadips_destructor);
3431
3432
3433         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3434                                ctdb_reloadips_child_handler, (void *)h);
3435         tevent_fd_set_auto_close(h->fde);
3436
3437         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3438                          ctdb_reloadips_timeout_event, h);
3439
3440         /* we reply later */
3441         *async_reply = true;
3442         return 0;
3443 }