ctdb-ipalloc: New function ipalloc_set_public_ips()
[samba.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
59 {
60         if (vnn->iface) {
61                 return vnn->iface->name;
62         }
63
64         return "__none__";
65 }
66
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
68 {
69         struct ctdb_interface *i;
70
71         if (strlen(iface) > CTDB_IFACE_SIZE) {
72                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
73                 return -1;
74         }
75
76         /* Verify that we don't have an entry for this ip yet */
77         for (i=ctdb->ifaces;i;i=i->next) {
78                 if (strcmp(i->name, iface) == 0) {
79                         return 0;
80                 }
81         }
82
83         /* create a new structure for this interface */
84         i = talloc_zero(ctdb, struct ctdb_interface);
85         CTDB_NO_MEMORY_FATAL(ctdb, i);
86         i->name = talloc_strdup(i, iface);
87         CTDB_NO_MEMORY(ctdb, i->name);
88
89         i->link_up = true;
90
91         DLIST_ADD(ctdb->ifaces, i);
92
93         return 0;
94 }
95
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
97                                         const char *name)
98 {
99         int n;
100
101         for (n = 0; vnn->ifaces[n] != NULL; n++) {
102                 if (strcmp(name, vnn->ifaces[n]) == 0) {
103                         return true;
104                 }
105         }
106
107         return false;
108 }
109
110 /* If any interfaces now have no possible IPs then delete them.  This
111  * implementation is naive (i.e. simple) rather than clever
112  * (i.e. complex).  Given that this is run on delip and that operation
113  * is rare, this doesn't need to be efficient - it needs to be
114  * foolproof.  One alternative is reference counting, where the logic
115  * is distributed and can, therefore, be broken in multiple places.
116  * Another alternative is to build a red-black tree of interfaces that
117  * can have addresses (by walking ctdb->vnn once) and then walking
118  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
119  * one of those if the naive implementation causes problems...  :-)
120  */
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122                                         struct ctdb_vnn *vnn)
123 {
124         struct ctdb_interface *i, *next;
125
126         /* For each interface, check if there's an IP using it. */
127         for (i = ctdb->ifaces; i != NULL; i = next) {
128                 struct ctdb_vnn *tv;
129                 bool found;
130                 next = i->next;
131
132                 /* Only consider interfaces named in the given VNN. */
133                 if (!vnn_has_interface_with_name(vnn, i->name)) {
134                         continue;
135                 }
136
137                 /* Search for a vnn with this interface. */
138                 found = false;
139                 for (tv=ctdb->vnn; tv; tv=tv->next) {
140                         if (vnn_has_interface_with_name(tv, i->name)) {
141                                 found = true;
142                                 break;
143                         }
144                 }
145
146                 if (!found) {
147                         /* None of the VNNs are using this interface. */
148                         DLIST_REMOVE(ctdb->ifaces, i);
149                         talloc_free(i);
150                 }
151         }
152 }
153
154
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
156                                               const char *iface)
157 {
158         struct ctdb_interface *i;
159
160         for (i=ctdb->ifaces;i;i=i->next) {
161                 if (strcmp(i->name, iface) == 0) {
162                         return i;
163                 }
164         }
165
166         return NULL;
167 }
168
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170                                                   struct ctdb_vnn *vnn)
171 {
172         int i;
173         struct ctdb_interface *cur = NULL;
174         struct ctdb_interface *best = NULL;
175
176         for (i=0; vnn->ifaces[i]; i++) {
177
178                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
179                 if (cur == NULL) {
180                         continue;
181                 }
182
183                 if (!cur->link_up) {
184                         continue;
185                 }
186
187                 if (best == NULL) {
188                         best = cur;
189                         continue;
190                 }
191
192                 if (cur->references < best->references) {
193                         best = cur;
194                         continue;
195                 }
196         }
197
198         return best;
199 }
200
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202                                      struct ctdb_vnn *vnn)
203 {
204         struct ctdb_interface *best = NULL;
205
206         if (vnn->iface) {
207                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208                                    "still assigned to iface '%s'\n",
209                                    ctdb_addr_to_str(&vnn->public_address),
210                                    ctdb_vnn_iface_string(vnn)));
211                 return 0;
212         }
213
214         best = ctdb_vnn_best_iface(ctdb, vnn);
215         if (best == NULL) {
216                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217                                   "cannot assign to iface any iface\n",
218                                   ctdb_addr_to_str(&vnn->public_address)));
219                 return -1;
220         }
221
222         vnn->iface = best;
223         best->references++;
224         vnn->pnn = ctdb->pnn;
225
226         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227                            "now assigned to iface '%s' refs[%d]\n",
228                            ctdb_addr_to_str(&vnn->public_address),
229                            ctdb_vnn_iface_string(vnn),
230                            best->references));
231         return 0;
232 }
233
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235                                     struct ctdb_vnn *vnn)
236 {
237         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238                            "now unassigned (old iface '%s' refs[%d])\n",
239                            ctdb_addr_to_str(&vnn->public_address),
240                            ctdb_vnn_iface_string(vnn),
241                            vnn->iface?vnn->iface->references:0));
242         if (vnn->iface) {
243                 vnn->iface->references--;
244         }
245         vnn->iface = NULL;
246         if (vnn->pnn == ctdb->pnn) {
247                 vnn->pnn = -1;
248         }
249 }
250
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252                                struct ctdb_vnn *vnn)
253 {
254         int i;
255
256         /* Nodes that are not RUNNING can not host IPs */
257         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
258                 return false;
259         }
260
261         if (vnn->delete_pending) {
262                 return false;
263         }
264
265         if (vnn->iface && vnn->iface->link_up) {
266                 return true;
267         }
268
269         for (i=0; vnn->ifaces[i]; i++) {
270                 struct ctdb_interface *cur;
271
272                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
273                 if (cur == NULL) {
274                         continue;
275                 }
276
277                 if (cur->link_up) {
278                         return true;
279                 }
280         }
281
282         return false;
283 }
284
285 struct ctdb_takeover_arp {
286         struct ctdb_context *ctdb;
287         uint32_t count;
288         ctdb_sock_addr addr;
289         struct ctdb_tcp_array *tcparray;
290         struct ctdb_vnn *vnn;
291 };
292
293
294 /*
295   lists of tcp endpoints
296  */
297 struct ctdb_tcp_list {
298         struct ctdb_tcp_list *prev, *next;
299         struct ctdb_connection connection;
300 };
301
302 /*
303   list of clients to kill on IP release
304  */
305 struct ctdb_client_ip {
306         struct ctdb_client_ip *prev, *next;
307         struct ctdb_context *ctdb;
308         ctdb_sock_addr addr;
309         uint32_t client_id;
310 };
311
312
313 /*
314   send a gratuitous arp
315  */
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317                                   struct tevent_timer *te,
318                                   struct timeval t, void *private_data)
319 {
320         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
321                                                         struct ctdb_takeover_arp);
322         int i, ret;
323         struct ctdb_tcp_array *tcparray;
324         const char *iface = ctdb_vnn_iface_string(arp->vnn);
325
326         ret = ctdb_sys_send_arp(&arp->addr, iface);
327         if (ret != 0) {
328                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329                                   iface, strerror(errno)));
330         }
331
332         tcparray = arp->tcparray;
333         if (tcparray) {
334                 for (i=0;i<tcparray->num;i++) {
335                         struct ctdb_connection *tcon;
336
337                         tcon = &tcparray->connections[i];
338                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
340                                 ctdb_addr_to_str(&tcon->src),
341                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
342                         ret = ctdb_sys_send_tcp(
343                                 &tcon->src,
344                                 &tcon->dst,
345                                 0, 0, 0);
346                         if (ret != 0) {
347                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348                                         ctdb_addr_to_str(&tcon->src)));
349                         }
350                 }
351         }
352
353         arp->count++;
354
355         if (arp->count == CTDB_ARP_REPEAT) {
356                 talloc_free(arp);
357                 return;
358         }
359
360         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362                          ctdb_control_send_arp, arp);
363 }
364
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366                                        struct ctdb_vnn *vnn)
367 {
368         struct ctdb_takeover_arp *arp;
369         struct ctdb_tcp_array *tcparray;
370
371         if (!vnn->takeover_ctx) {
372                 vnn->takeover_ctx = talloc_new(vnn);
373                 if (!vnn->takeover_ctx) {
374                         return -1;
375                 }
376         }
377
378         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
379         if (!arp) {
380                 return -1;
381         }
382
383         arp->ctdb = ctdb;
384         arp->addr = vnn->public_address;
385         arp->vnn  = vnn;
386
387         tcparray = vnn->tcp_array;
388         if (tcparray) {
389                 /* add all of the known tcp connections for this IP to the
390                    list of tcp connections to send tickle acks for */
391                 arp->tcparray = talloc_steal(arp, tcparray);
392
393                 vnn->tcp_array = NULL;
394                 vnn->tcp_update_needed = true;
395         }
396
397         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398                          timeval_zero(), ctdb_control_send_arp, arp);
399
400         return 0;
401 }
402
403 struct takeover_callback_state {
404         struct ctdb_req_control_old *c;
405         ctdb_sock_addr *addr;
406         struct ctdb_vnn *vnn;
407 };
408
409 struct ctdb_do_takeip_state {
410         struct ctdb_req_control_old *c;
411         struct ctdb_vnn *vnn;
412 };
413
414 /*
415   called when takeip event finishes
416  */
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
418                                     void *private_data)
419 {
420         struct ctdb_do_takeip_state *state =
421                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
422         int32_t ret;
423         TDB_DATA data;
424
425         if (status != 0) {
426                 if (status == -ETIME) {
427                         ctdb_ban_self(ctdb);
428                 }
429                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430                                  ctdb_addr_to_str(&state->vnn->public_address),
431                                  ctdb_vnn_iface_string(state->vnn)));
432                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
433
434                 talloc_free(state);
435                 return;
436         }
437
438         if (ctdb->do_checkpublicip) {
439
440         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
441         if (ret != 0) {
442                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
443                 talloc_free(state);
444                 return;
445         }
446
447         }
448
449         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450         data.dsize = strlen((char *)data.dptr) + 1;
451         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
452
453         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
454
455
456         /* the control succeeded */
457         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
458         talloc_free(state);
459         return;
460 }
461
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
463 {
464         state->vnn->update_in_flight = false;
465         return 0;
466 }
467
468 /*
469   take over an ip address
470  */
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472                               struct ctdb_req_control_old *c,
473                               struct ctdb_vnn *vnn)
474 {
475         int ret;
476         struct ctdb_do_takeip_state *state;
477
478         if (vnn->update_in_flight) {
479                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480                                     "update for this IP already in flight\n",
481                                     ctdb_addr_to_str(&vnn->public_address),
482                                     vnn->public_netmask_bits));
483                 return -1;
484         }
485
486         ret = ctdb_vnn_assign_iface(ctdb, vnn);
487         if (ret != 0) {
488                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489                                  "assign a usable interface\n",
490                                  ctdb_addr_to_str(&vnn->public_address),
491                                  vnn->public_netmask_bits));
492                 return -1;
493         }
494
495         state = talloc(vnn, struct ctdb_do_takeip_state);
496         CTDB_NO_MEMORY(ctdb, state);
497
498         state->c = talloc_steal(ctdb, c);
499         state->vnn   = vnn;
500
501         vnn->update_in_flight = true;
502         talloc_set_destructor(state, ctdb_takeip_destructor);
503
504         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505                             ctdb_addr_to_str(&vnn->public_address),
506                             vnn->public_netmask_bits,
507                             ctdb_vnn_iface_string(vnn)));
508
509         ret = ctdb_event_script_callback(ctdb,
510                                          state,
511                                          ctdb_do_takeip_callback,
512                                          state,
513                                          CTDB_EVENT_TAKE_IP,
514                                          "%s %s %u",
515                                          ctdb_vnn_iface_string(vnn),
516                                          ctdb_addr_to_str(&vnn->public_address),
517                                          vnn->public_netmask_bits);
518
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
521                         ctdb_addr_to_str(&vnn->public_address),
522                         ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 struct ctdb_do_updateip_state {
531         struct ctdb_req_control_old *c;
532         struct ctdb_interface *old;
533         struct ctdb_vnn *vnn;
534 };
535
536 /*
537   called when updateip event finishes
538  */
539 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
540                                       void *private_data)
541 {
542         struct ctdb_do_updateip_state *state =
543                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
544         int32_t ret;
545
546         if (status != 0) {
547                 if (status == -ETIME) {
548                         ctdb_ban_self(ctdb);
549                 }
550                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
551                         ctdb_addr_to_str(&state->vnn->public_address),
552                         state->old->name,
553                         ctdb_vnn_iface_string(state->vnn)));
554
555                 /*
556                  * All we can do is reset the old interface
557                  * and let the next run fix it
558                  */
559                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
560                 state->vnn->iface = state->old;
561                 state->vnn->iface->references++;
562
563                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
564                 talloc_free(state);
565                 return;
566         }
567
568         if (ctdb->do_checkpublicip) {
569
570         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
571         if (ret != 0) {
572                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
573                 talloc_free(state);
574                 return;
575         }
576
577         }
578
579         /* the control succeeded */
580         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
581         talloc_free(state);
582         return;
583 }
584
585 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
586 {
587         state->vnn->update_in_flight = false;
588         return 0;
589 }
590
591 /*
592   update (move) an ip address
593  */
594 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
595                                 struct ctdb_req_control_old *c,
596                                 struct ctdb_vnn *vnn)
597 {
598         int ret;
599         struct ctdb_do_updateip_state *state;
600         struct ctdb_interface *old = vnn->iface;
601         const char *new_name;
602
603         if (vnn->update_in_flight) {
604                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
605                                     "update for this IP already in flight\n",
606                                     ctdb_addr_to_str(&vnn->public_address),
607                                     vnn->public_netmask_bits));
608                 return -1;
609         }
610
611         ctdb_vnn_unassign_iface(ctdb, vnn);
612         ret = ctdb_vnn_assign_iface(ctdb, vnn);
613         if (ret != 0) {
614                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
615                                  "assin a usable interface (old iface '%s')\n",
616                                  ctdb_addr_to_str(&vnn->public_address),
617                                  vnn->public_netmask_bits,
618                                  old->name));
619                 return -1;
620         }
621
622         new_name = ctdb_vnn_iface_string(vnn);
623         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
624                 /* A benign update from one interface onto itself.
625                  * no need to run the eventscripts in this case, just return
626                  * success.
627                  */
628                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
629                 return 0;
630         }
631
632         state = talloc(vnn, struct ctdb_do_updateip_state);
633         CTDB_NO_MEMORY(ctdb, state);
634
635         state->c = talloc_steal(ctdb, c);
636         state->old = old;
637         state->vnn = vnn;
638
639         vnn->update_in_flight = true;
640         talloc_set_destructor(state, ctdb_updateip_destructor);
641
642         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
643                             "interface %s to %s\n",
644                             ctdb_addr_to_str(&vnn->public_address),
645                             vnn->public_netmask_bits,
646                             old->name,
647                             new_name));
648
649         ret = ctdb_event_script_callback(ctdb,
650                                          state,
651                                          ctdb_do_updateip_callback,
652                                          state,
653                                          CTDB_EVENT_UPDATE_IP,
654                                          "%s %s %s %u",
655                                          state->old->name,
656                                          new_name,
657                                          ctdb_addr_to_str(&vnn->public_address),
658                                          vnn->public_netmask_bits);
659         if (ret != 0) {
660                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
661                                  ctdb_addr_to_str(&vnn->public_address),
662                                  old->name, new_name));
663                 talloc_free(state);
664                 return -1;
665         }
666
667         return 0;
668 }
669
670 /*
671   Find the vnn of the node that has a public ip address
672   returns -1 if the address is not known as a public address
673  */
674 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
675 {
676         struct ctdb_vnn *vnn;
677
678         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
679                 if (ctdb_same_ip(&vnn->public_address, addr)) {
680                         return vnn;
681                 }
682         }
683
684         return NULL;
685 }
686
687 /*
688   take over an ip address
689  */
690 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
691                                  struct ctdb_req_control_old *c,
692                                  TDB_DATA indata,
693                                  bool *async_reply)
694 {
695         int ret;
696         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
697         struct ctdb_vnn *vnn;
698         bool have_ip = false;
699         bool do_updateip = false;
700         bool do_takeip = false;
701         struct ctdb_interface *best_iface = NULL;
702
703         if (pip->pnn != ctdb->pnn) {
704                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
705                                  "with pnn %d, but we're node %d\n",
706                                  ctdb_addr_to_str(&pip->addr),
707                                  pip->pnn, ctdb->pnn));
708                 return -1;
709         }
710
711         /* update out vnn list */
712         vnn = find_public_ip_vnn(ctdb, &pip->addr);
713         if (vnn == NULL) {
714                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
715                         ctdb_addr_to_str(&pip->addr)));
716                 return 0;
717         }
718
719         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
720                 have_ip = ctdb_sys_have_ip(&pip->addr);
721         }
722         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
723         if (best_iface == NULL) {
724                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
725                                  "a usable interface (old %s, have_ip %d)\n",
726                                  ctdb_addr_to_str(&vnn->public_address),
727                                  vnn->public_netmask_bits,
728                                  ctdb_vnn_iface_string(vnn),
729                                  have_ip));
730                 return -1;
731         }
732
733         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
734                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
735                 have_ip = false;
736         }
737
738
739         if (vnn->iface == NULL && have_ip) {
740                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
741                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
742                                  ctdb_addr_to_str(&vnn->public_address)));
743                 return 0;
744         }
745
746         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
747                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
748                                   "and we have it on iface[%s], but it was assigned to node %d"
749                                   "and we are node %d, banning ourself\n",
750                                  ctdb_addr_to_str(&vnn->public_address),
751                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
752                 ctdb_ban_self(ctdb);
753                 return -1;
754         }
755
756         if (vnn->pnn == -1 && have_ip) {
757                 vnn->pnn = ctdb->pnn;
758                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759                                   "and we already have it on iface[%s], update local daemon\n",
760                                  ctdb_addr_to_str(&vnn->public_address),
761                                   ctdb_vnn_iface_string(vnn)));
762                 return 0;
763         }
764
765         if (vnn->iface) {
766                 if (vnn->iface != best_iface) {
767                         if (!vnn->iface->link_up) {
768                                 do_updateip = true;
769                         } else if (vnn->iface->references > (best_iface->references + 1)) {
770                                 /* only move when the rebalance gains something */
771                                         do_updateip = true;
772                         }
773                 }
774         }
775
776         if (!have_ip) {
777                 if (do_updateip) {
778                         ctdb_vnn_unassign_iface(ctdb, vnn);
779                         do_updateip = false;
780                 }
781                 do_takeip = true;
782         }
783
784         if (do_takeip) {
785                 ret = ctdb_do_takeip(ctdb, c, vnn);
786                 if (ret != 0) {
787                         return -1;
788                 }
789         } else if (do_updateip) {
790                 ret = ctdb_do_updateip(ctdb, c, vnn);
791                 if (ret != 0) {
792                         return -1;
793                 }
794         } else {
795                 /*
796                  * The interface is up and the kernel known the ip
797                  * => do nothing
798                  */
799                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
800                         ctdb_addr_to_str(&pip->addr),
801                         vnn->public_netmask_bits,
802                         ctdb_vnn_iface_string(vnn)));
803                 return 0;
804         }
805
806         /* tell ctdb_control.c that we will be replying asynchronously */
807         *async_reply = true;
808
809         return 0;
810 }
811
812 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
813 {
814         DLIST_REMOVE(ctdb->vnn, vnn);
815         ctdb_vnn_unassign_iface(ctdb, vnn);
816         ctdb_remove_orphaned_ifaces(ctdb, vnn);
817         talloc_free(vnn);
818 }
819
820 /*
821   called when releaseip event finishes
822  */
823 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
824                                 void *private_data)
825 {
826         struct takeover_callback_state *state = 
827                 talloc_get_type(private_data, struct takeover_callback_state);
828         TDB_DATA data;
829
830         if (status == -ETIME) {
831                 ctdb_ban_self(ctdb);
832         }
833
834         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
835                 if  (ctdb_sys_have_ip(state->addr)) {
836                         DEBUG(DEBUG_ERR,
837                               ("IP %s still hosted during release IP callback, failing\n",
838                                ctdb_addr_to_str(state->addr)));
839                         ctdb_request_control_reply(ctdb, state->c,
840                                                    NULL, -1, NULL);
841                         talloc_free(state);
842                         return;
843                 }
844         }
845
846         /* send a message to all clients of this node telling them
847            that the cluster has been reconfigured and they should
848            release any sockets on this IP */
849         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
850         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
851         data.dsize = strlen((char *)data.dptr)+1;
852
853         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
854
855         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
856
857         ctdb_vnn_unassign_iface(ctdb, state->vnn);
858
859         /* Process the IP if it has been marked for deletion */
860         if (state->vnn->delete_pending) {
861                 do_delete_ip(ctdb, state->vnn);
862                 state->vnn = NULL;
863         }
864
865         /* the control succeeded */
866         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
867         talloc_free(state);
868 }
869
870 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
871 {
872         if (state->vnn != NULL) {
873                 state->vnn->update_in_flight = false;
874         }
875         return 0;
876 }
877
878 /*
879   release an ip address
880  */
881 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
882                                 struct ctdb_req_control_old *c,
883                                 TDB_DATA indata, 
884                                 bool *async_reply)
885 {
886         int ret;
887         struct takeover_callback_state *state;
888         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
889         struct ctdb_vnn *vnn;
890         char *iface;
891
892         /* update our vnn list */
893         vnn = find_public_ip_vnn(ctdb, &pip->addr);
894         if (vnn == NULL) {
895                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
896                         ctdb_addr_to_str(&pip->addr)));
897                 return 0;
898         }
899         vnn->pnn = pip->pnn;
900
901         /* stop any previous arps */
902         talloc_free(vnn->takeover_ctx);
903         vnn->takeover_ctx = NULL;
904
905         /* Some ctdb tool commands (e.g. moveip) send
906          * lazy multicast to drop an IP from any node that isn't the
907          * intended new node.  The following causes makes ctdbd ignore
908          * a release for any address it doesn't host.
909          */
910         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
911                 if (!ctdb_sys_have_ip(&pip->addr)) {
912                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
913                                 ctdb_addr_to_str(&pip->addr),
914                                 vnn->public_netmask_bits,
915                                 ctdb_vnn_iface_string(vnn)));
916                         ctdb_vnn_unassign_iface(ctdb, vnn);
917                         return 0;
918                 }
919         } else {
920                 if (vnn->iface == NULL) {
921                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
922                                            ctdb_addr_to_str(&pip->addr),
923                                            vnn->public_netmask_bits));
924                         return 0;
925                 }
926         }
927
928         /* There is a potential race between take_ip and us because we
929          * update the VNN via a callback that run when the
930          * eventscripts have been run.  Avoid the race by allowing one
931          * update to be in flight at a time.
932          */
933         if (vnn->update_in_flight) {
934                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
935                                     "update for this IP already in flight\n",
936                                     ctdb_addr_to_str(&vnn->public_address),
937                                     vnn->public_netmask_bits));
938                 return -1;
939         }
940
941         iface = strdup(ctdb_vnn_iface_string(vnn));
942
943         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
944                 ctdb_addr_to_str(&pip->addr),
945                 vnn->public_netmask_bits,
946                 iface,
947                 pip->pnn));
948
949         state = talloc(ctdb, struct takeover_callback_state);
950         if (state == NULL) {
951                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
952                                __FILE__, __LINE__);
953                 free(iface);
954                 return -1;
955         }
956
957         state->c = talloc_steal(state, c);
958         state->addr = talloc(state, ctdb_sock_addr);       
959         if (state->addr == NULL) {
960                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
961                                __FILE__, __LINE__);
962                 free(iface);
963                 talloc_free(state);
964                 return -1;
965         }
966         *state->addr = pip->addr;
967         state->vnn   = vnn;
968
969         vnn->update_in_flight = true;
970         talloc_set_destructor(state, ctdb_releaseip_destructor);
971
972         ret = ctdb_event_script_callback(ctdb, 
973                                          state, release_ip_callback, state,
974                                          CTDB_EVENT_RELEASE_IP,
975                                          "%s %s %u",
976                                          iface,
977                                          ctdb_addr_to_str(&pip->addr),
978                                          vnn->public_netmask_bits);
979         free(iface);
980         if (ret != 0) {
981                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
982                         ctdb_addr_to_str(&pip->addr),
983                         ctdb_vnn_iface_string(vnn)));
984                 talloc_free(state);
985                 return -1;
986         }
987
988         /* tell the control that we will be reply asynchronously */
989         *async_reply = true;
990         return 0;
991 }
992
993 static int ctdb_add_public_address(struct ctdb_context *ctdb,
994                                    ctdb_sock_addr *addr,
995                                    unsigned mask, const char *ifaces,
996                                    bool check_address)
997 {
998         struct ctdb_vnn      *vnn;
999         uint32_t num = 0;
1000         char *tmp;
1001         const char *iface;
1002         int i;
1003         int ret;
1004
1005         tmp = strdup(ifaces);
1006         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1007                 if (!ctdb_sys_check_iface_exists(iface)) {
1008                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1009                         free(tmp);
1010                         return -1;
1011                 }
1012         }
1013         free(tmp);
1014
1015         /* Verify that we don't have an entry for this ip yet */
1016         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1017                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1018                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1019                                 ctdb_addr_to_str(addr)));
1020                         return -1;
1021                 }               
1022         }
1023
1024         /* create a new vnn structure for this ip address */
1025         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1026         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1027         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1028         tmp = talloc_strdup(vnn, ifaces);
1029         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1030         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1031                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1032                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1033                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1034                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1035                 num++;
1036         }
1037         talloc_free(tmp);
1038         vnn->ifaces[num] = NULL;
1039         vnn->public_address      = *addr;
1040         vnn->public_netmask_bits = mask;
1041         vnn->pnn                 = -1;
1042         if (check_address) {
1043                 if (ctdb_sys_have_ip(addr)) {
1044                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1045                         vnn->pnn = ctdb->pnn;
1046                 }
1047         }
1048
1049         for (i=0; vnn->ifaces[i]; i++) {
1050                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1051                 if (ret != 0) {
1052                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1053                                            "for public_address[%s]\n",
1054                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1055                         talloc_free(vnn);
1056                         return -1;
1057                 }
1058         }
1059
1060         DLIST_ADD(ctdb->vnn, vnn);
1061
1062         return 0;
1063 }
1064
1065 /*
1066   setup the public address lists from a file
1067 */
1068 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1069 {
1070         char **lines;
1071         int nlines;
1072         int i;
1073
1074         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1075         if (lines == NULL) {
1076                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1077                 return -1;
1078         }
1079         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1080                 nlines--;
1081         }
1082
1083         for (i=0;i<nlines;i++) {
1084                 unsigned mask;
1085                 ctdb_sock_addr addr;
1086                 const char *addrstr;
1087                 const char *ifaces;
1088                 char *tok, *line;
1089
1090                 line = lines[i];
1091                 while ((*line == ' ') || (*line == '\t')) {
1092                         line++;
1093                 }
1094                 if (*line == '#') {
1095                         continue;
1096                 }
1097                 if (strcmp(line, "") == 0) {
1098                         continue;
1099                 }
1100                 tok = strtok(line, " \t");
1101                 addrstr = tok;
1102                 tok = strtok(NULL, " \t");
1103                 if (tok == NULL) {
1104                         if (NULL == ctdb->default_public_interface) {
1105                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1106                                          i+1));
1107                                 talloc_free(lines);
1108                                 return -1;
1109                         }
1110                         ifaces = ctdb->default_public_interface;
1111                 } else {
1112                         ifaces = tok;
1113                 }
1114
1115                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1116                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1117                         talloc_free(lines);
1118                         return -1;
1119                 }
1120                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1121                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1122                         talloc_free(lines);
1123                         return -1;
1124                 }
1125         }
1126
1127
1128         talloc_free(lines);
1129         return 0;
1130 }
1131
1132 static void *add_ip_callback(void *parm, void *data)
1133 {
1134         struct public_ip_list *this_ip = parm;
1135         struct public_ip_list *prev_ip = data;
1136
1137         if (prev_ip == NULL) {
1138                 return parm;
1139         }
1140         if (this_ip->pnn == -1) {
1141                 this_ip->pnn = prev_ip->pnn;
1142         }
1143
1144         return parm;
1145 }
1146
1147 static int getips_count_callback(void *param, void *data)
1148 {
1149         struct public_ip_list **ip_list = (struct public_ip_list **)param;
1150         struct public_ip_list *new_ip = (struct public_ip_list *)data;
1151
1152         new_ip->next = *ip_list;
1153         *ip_list     = new_ip;
1154         return 0;
1155 }
1156
1157 static struct ctdb_public_ip_list *
1158 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1159                              TALLOC_CTX *mem_ctx,
1160                              struct ctdb_node_map_old *nodemap,
1161                              uint32_t public_ip_flags)
1162 {
1163         int j, ret;
1164         struct ctdb_public_ip_list_old *ip_list;
1165         struct ctdb_public_ip_list *public_ips;
1166
1167         public_ips = talloc_zero_array(mem_ctx,
1168                                        struct ctdb_public_ip_list,
1169                                        nodemap->num);
1170         if (public_ips == NULL) {
1171                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1172                 return NULL;
1173         }
1174
1175         for (j = 0; j < nodemap->num; j++) {
1176                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1177                         continue;
1178                 }
1179
1180                 /* Retrieve the list of public IPs from the
1181                  * node. Flags says whether it is known or
1182                  * available. */
1183                 ret = ctdb_ctrl_get_public_ips_flags(
1184                         ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1185                         public_ip_flags, &ip_list);
1186                 if (ret != 0) {
1187                         DEBUG(DEBUG_ERR,
1188                               ("Failed to read public IPs from node: %u\n", j));
1189                         talloc_free(public_ips);
1190                         return NULL;
1191                 }
1192                 public_ips[j].num = ip_list->num;
1193                 if (ip_list->num == 0) {
1194                         talloc_free(ip_list);
1195                         continue;
1196                 }
1197                 public_ips[j].ip = talloc_zero_array(public_ips,
1198                                                      struct ctdb_public_ip,
1199                                                      ip_list->num);
1200                 if (public_ips[j].ip == NULL) {
1201                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1202                         talloc_free(public_ips);
1203                         return NULL;
1204                 }
1205                 memcpy(public_ips[j].ip, &ip_list->ips[0],
1206                        sizeof(struct ctdb_public_ip) * ip_list->num);
1207                 talloc_free(ip_list);
1208         }
1209
1210         return public_ips;
1211 }
1212
1213 static struct public_ip_list *
1214 create_merged_ip_list(struct ipalloc_state *ipalloc_state)
1215 {
1216         int i, j;
1217         struct public_ip_list *ip_list;
1218         struct ctdb_public_ip_list *public_ips;
1219         struct trbt_tree *ip_tree;
1220
1221         ip_tree = trbt_create(ipalloc_state, 0);
1222
1223         if (ipalloc_state->known_public_ips == NULL) {
1224                 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
1225                 return NULL;
1226         }
1227
1228         for (i=0; i < ipalloc_state->num; i++) {
1229
1230                 public_ips = &ipalloc_state->known_public_ips[i];
1231
1232                 for (j=0; j < public_ips->num; j++) {
1233                         struct public_ip_list *tmp_ip;
1234
1235                         /* This is returned as part of ip_list */
1236                         tmp_ip = talloc_zero(ipalloc_state, struct public_ip_list);
1237                         if (tmp_ip == NULL) {
1238                                 DEBUG(DEBUG_ERR,
1239                                       (__location__ " out of memory\n"));
1240                                 talloc_free(ip_tree);
1241                                 return NULL;
1242                         }
1243
1244                         /* Do not use information about IP addresses hosted
1245                          * on other nodes, it may not be accurate */
1246                         if (public_ips->ip[j].pnn == i) {
1247                                 tmp_ip->pnn = public_ips->ip[j].pnn;
1248                         } else {
1249                                 tmp_ip->pnn = -1;
1250                         }
1251                         tmp_ip->addr = public_ips->ip[j].addr;
1252                         tmp_ip->next = NULL;
1253
1254                         trbt_insertarray32_callback(ip_tree,
1255                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1256                                 add_ip_callback,
1257                                 tmp_ip);
1258                 }
1259         }
1260
1261         ip_list = NULL;
1262         trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1263         talloc_free(ip_tree);
1264
1265         return ip_list;
1266 }
1267
1268 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1269 {
1270         int i;
1271
1272         for (i=0;i<nodemap->num;i++) {
1273                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1274                         /* Found one completely healthy node */
1275                         return false;
1276                 }
1277         }
1278
1279         return true;
1280 }
1281
1282 struct get_tunable_callback_data {
1283         const char *tunable;
1284         uint32_t *out;
1285         bool fatal;
1286 };
1287
1288 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1289                                  int32_t res, TDB_DATA outdata,
1290                                  void *callback)
1291 {
1292         struct get_tunable_callback_data *cd =
1293                 (struct get_tunable_callback_data *)callback;
1294         int size;
1295
1296         if (res != 0) {
1297                 /* Already handled in fail callback */
1298                 return;
1299         }
1300
1301         if (outdata.dsize != sizeof(uint32_t)) {
1302                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1303                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1304                                  (int)outdata.dsize));
1305                 cd->fatal = true;
1306                 return;
1307         }
1308
1309         size = talloc_array_length(cd->out);
1310         if (pnn >= size) {
1311                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1312                                  cd->tunable, pnn, size));
1313                 return;
1314         }
1315
1316                 
1317         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1318 }
1319
1320 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1321                                        int32_t res, TDB_DATA outdata,
1322                                        void *callback)
1323 {
1324         struct get_tunable_callback_data *cd =
1325                 (struct get_tunable_callback_data *)callback;
1326
1327         switch (res) {
1328         case -ETIME:
1329                 DEBUG(DEBUG_ERR,
1330                       ("Timed out getting tunable \"%s\" from node %d\n",
1331                        cd->tunable, pnn));
1332                 cd->fatal = true;
1333                 break;
1334         case -EINVAL:
1335         case -1:
1336                 DEBUG(DEBUG_WARNING,
1337                       ("Tunable \"%s\" not implemented on node %d\n",
1338                        cd->tunable, pnn));
1339                 break;
1340         default:
1341                 DEBUG(DEBUG_ERR,
1342                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1343                        cd->tunable, pnn));
1344                 cd->fatal = true;
1345         }
1346 }
1347
1348 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1349                                         TALLOC_CTX *tmp_ctx,
1350                                         struct ctdb_node_map_old *nodemap,
1351                                         const char *tunable,
1352                                         uint32_t default_value)
1353 {
1354         TDB_DATA data;
1355         struct ctdb_control_get_tunable *t;
1356         uint32_t *nodes;
1357         uint32_t *tvals;
1358         struct get_tunable_callback_data callback_data;
1359         int i;
1360
1361         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1362         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1363         for (i=0; i<nodemap->num; i++) {
1364                 tvals[i] = default_value;
1365         }
1366                 
1367         callback_data.out = tvals;
1368         callback_data.tunable = tunable;
1369         callback_data.fatal = false;
1370
1371         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1372         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1373         t = (struct ctdb_control_get_tunable *)data.dptr;
1374         t->length = strlen(tunable)+1;
1375         memcpy(t->name, tunable, t->length);
1376         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1377         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1378                                       nodes, 0, TAKEOVER_TIMEOUT(),
1379                                       false, data,
1380                                       get_tunable_callback,
1381                                       get_tunable_fail_callback,
1382                                       &callback_data) != 0) {
1383                 if (callback_data.fatal) {
1384                         talloc_free(tvals);
1385                         tvals = NULL;
1386                 }
1387         }
1388         talloc_free(nodes);
1389         talloc_free(data.dptr);
1390
1391         return tvals;
1392 }
1393
1394 /* Set internal flags for IP allocation:
1395  *   Clear ip flags
1396  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1397  *   Set NOIPHOST ip flag for each INACTIVE node
1398  *   if all nodes are disabled:
1399  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1400  *   else
1401  *     Set NOIPHOST ip flags for disabled nodes
1402  */
1403 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1404                                  struct ctdb_node_map_old *nodemap,
1405                                  uint32_t *tval_noiptakeover,
1406                                  uint32_t *tval_noiphostonalldisabled)
1407 {
1408         int i;
1409
1410         for (i=0;i<nodemap->num;i++) {
1411                 /* Can not take IPs on node with NoIPTakeover set */
1412                 if (tval_noiptakeover[i] != 0) {
1413                         ipalloc_state->noiptakeover[i] = true;
1414                 }
1415
1416                 /* Can not host IPs on INACTIVE node */
1417                 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1418                         ipalloc_state->noiphost[i] = true;
1419                 }
1420         }
1421
1422         if (all_nodes_are_disabled(nodemap)) {
1423                 /* If all nodes are disabled, can not host IPs on node
1424                  * with NoIPHostOnAllDisabled set
1425                  */
1426                 for (i=0;i<nodemap->num;i++) {
1427                         if (tval_noiphostonalldisabled[i] != 0) {
1428                                 ipalloc_state->noiphost[i] = true;
1429                         }
1430                 }
1431         } else {
1432                 /* If some nodes are not disabled, then can not host
1433                  * IPs on DISABLED node
1434                  */
1435                 for (i=0;i<nodemap->num;i++) {
1436                         if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1437                                 ipalloc_state->noiphost[i] = true;
1438                         }
1439                 }
1440         }
1441 }
1442
1443 static bool set_ipflags(struct ctdb_context *ctdb,
1444                         struct ipalloc_state *ipalloc_state,
1445                         struct ctdb_node_map_old *nodemap)
1446 {
1447         uint32_t *tval_noiptakeover;
1448         uint32_t *tval_noiphostonalldisabled;
1449
1450         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1451                                                    "NoIPTakeover", 0);
1452         if (tval_noiptakeover == NULL) {
1453                 return false;
1454         }
1455
1456         tval_noiphostonalldisabled =
1457                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1458                                        "NoIPHostOnAllDisabled", 0);
1459         if (tval_noiphostonalldisabled == NULL) {
1460                 /* Caller frees tmp_ctx */
1461                 return false;
1462         }
1463
1464         set_ipflags_internal(ipalloc_state, nodemap,
1465                              tval_noiptakeover,
1466                              tval_noiphostonalldisabled);
1467
1468         talloc_free(tval_noiptakeover);
1469         talloc_free(tval_noiphostonalldisabled);
1470
1471         return true;
1472 }
1473
1474 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1475                                                  TALLOC_CTX *mem_ctx)
1476 {
1477         struct ipalloc_state *ipalloc_state =
1478                 talloc_zero(mem_ctx, struct ipalloc_state);
1479         if (ipalloc_state == NULL) {
1480                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1481                 return NULL;
1482         }
1483
1484         ipalloc_state->num = ctdb->num_nodes;
1485
1486         ipalloc_state->noiptakeover =
1487                 talloc_zero_array(ipalloc_state,
1488                                   bool,
1489                                   ipalloc_state->num);
1490         if (ipalloc_state->noiptakeover == NULL) {
1491                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1492                 goto fail;
1493         }
1494         ipalloc_state->noiphost =
1495                 talloc_zero_array(ipalloc_state,
1496                                   bool,
1497                                   ipalloc_state->num);
1498         if (ipalloc_state->noiphost == NULL) {
1499                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1500                 goto fail;
1501         }
1502
1503         if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1504                 ipalloc_state->algorithm = IPALLOC_LCP2;
1505         } else if (1 == ctdb->tunable.deterministic_public_ips) {
1506                 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1507         } else {
1508                 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1509         }
1510
1511         ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1512
1513         return ipalloc_state;
1514 fail:
1515         talloc_free(ipalloc_state);
1516         return NULL;
1517 }
1518
1519 struct takeover_callback_data {
1520         uint32_t num_nodes;
1521         unsigned int *fail_count;
1522 };
1523
1524 static struct takeover_callback_data *
1525 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1526                             uint32_t num_nodes)
1527 {
1528         static struct takeover_callback_data *takeover_data;
1529
1530         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1531         if (takeover_data == NULL) {
1532                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1533                 return NULL;
1534         }
1535
1536         takeover_data->fail_count = talloc_zero_array(takeover_data,
1537                                                       unsigned int, num_nodes);
1538         if (takeover_data->fail_count == NULL) {
1539                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1540                 talloc_free(takeover_data);
1541                 return NULL;
1542         }
1543
1544         takeover_data->num_nodes = num_nodes;
1545
1546         return takeover_data;
1547 }
1548
1549 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1550                                        uint32_t node_pnn, int32_t res,
1551                                        TDB_DATA outdata, void *callback_data)
1552 {
1553         struct takeover_callback_data *cd =
1554                 talloc_get_type_abort(callback_data,
1555                                       struct takeover_callback_data);
1556
1557         if (node_pnn >= cd->num_nodes) {
1558                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1559                 return;
1560         }
1561
1562         if (cd->fail_count[node_pnn] == 0) {
1563                 DEBUG(DEBUG_ERR,
1564                       ("Node %u failed the takeover run\n", node_pnn));
1565         }
1566
1567         cd->fail_count[node_pnn]++;
1568 }
1569
1570 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1571                                           struct takeover_callback_data *tcd)
1572 {
1573         unsigned int max_fails = 0;
1574         uint32_t max_pnn = -1;
1575         uint32_t i;
1576
1577         for (i = 0; i < tcd->num_nodes; i++) {
1578                 if (tcd->fail_count[i] > max_fails) {
1579                         max_pnn = i;
1580                         max_fails = tcd->fail_count[i];
1581                 }
1582         }
1583
1584         if (max_fails > 0) {
1585                 int ret;
1586                 TDB_DATA data;
1587
1588                 DEBUG(DEBUG_ERR,
1589                       ("Sending banning credits to %u with fail count %u\n",
1590                        max_pnn, max_fails));
1591
1592                 data.dptr = (uint8_t *)&max_pnn;
1593                 data.dsize = sizeof(uint32_t);
1594                 ret = ctdb_client_send_message(ctdb,
1595                                                CTDB_BROADCAST_CONNECTED,
1596                                                CTDB_SRVID_BANNING,
1597                                                data);
1598                 if (ret != 0) {
1599                         DEBUG(DEBUG_ERR,
1600                               ("Failed to set banning credits for node %u\n",
1601                                max_pnn));
1602                 }
1603         }
1604 }
1605
1606 /*
1607  * Recalculate the allocation of public IPs to nodes and have the
1608  * nodes host their allocated addresses.
1609  *
1610  * - Allocate memory for IP allocation state, including per node
1611  *   arrays
1612  * - Populate IP allocation algorithm in IP allocation state
1613  * - Populate local value of tunable NoIPFailback in IP allocation
1614      state - this is really a cluster-wide configuration variable and
1615      only the value form the master node is used
1616  * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1617  *   connected nodes - this is done separately so tunable values can
1618  *   be faked in unit testing
1619  * - Populate NoIPTakover tunable in IP allocation state
1620  * - Populate NoIPHost in IP allocation state, derived from node flags
1621  *   and NoIPHostOnAllDisabled tunable
1622  * - Retrieve known and available IP addresses (done separately so
1623  *   values can be faked in unit testing)
1624  * - Use ipalloc_set_public_ips() to set known and available IP
1625      addresses for allocation
1626  * - If no available IP addresses then early exit
1627  * - Build list of (known IPs, currently assigned node)
1628  * - Populate list of nodes to force rebalance - internal structure,
1629  *   currently no way to fetch, only used by LCP2 for nodes that have
1630  *   had new IP addresses added
1631  * - Run IP allocation algorithm
1632  * - Send RELEASE_IP to all nodes for IPs they should not host
1633  * - Send TAKE_IP to all nodes for IPs they should host
1634  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1635  */
1636 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1637                       uint32_t *force_rebalance_nodes)
1638 {
1639         int i, ret;
1640         struct ctdb_public_ip ip;
1641         uint32_t *nodes;
1642         struct public_ip_list *all_ips, *tmp_ip;
1643         TDB_DATA data;
1644         struct timeval timeout;
1645         struct client_async_data *async_data;
1646         struct ctdb_client_control_state *state;
1647         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1648         struct ipalloc_state *ipalloc_state;
1649         struct ctdb_public_ip_list *known_ips, *available_ips;
1650         struct takeover_callback_data *takeover_data;
1651         bool can_host_ips;
1652
1653         /* Initialise fail callback data to be used with
1654          * takeover_run_fail_callback().  A failure in any of the
1655          * following steps will cause an early return, so this can be
1656          * reused for each of those steps without re-initialising. */
1657         takeover_data = takeover_callback_data_init(tmp_ctx,
1658                                                     nodemap->num);
1659         if (takeover_data == NULL) {
1660                 talloc_free(tmp_ctx);
1661                 return -1;
1662         }
1663
1664         /*
1665          * ip failover is completely disabled, just send out the 
1666          * ipreallocated event.
1667          */
1668         if (ctdb->tunable.disable_ip_failover != 0) {
1669                 goto ipreallocated;
1670         }
1671
1672         ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1673         if (ipalloc_state == NULL) {
1674                 talloc_free(tmp_ctx);
1675                 return -1;
1676         }
1677
1678         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1679                 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1680                 talloc_free(tmp_ctx);
1681                 return -1;
1682         }
1683
1684         /* Fetch known/available public IPs from each active node */
1685         /* Fetch lists of known public IPs from all nodes */
1686         known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1687                                                  nodemap, 0);
1688         if (known_ips == NULL) {
1689                 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1690                 talloc_free(tmp_ctx);
1691                 return -1;
1692         }
1693         available_ips = ctdb_fetch_remote_public_ips(
1694                 ctdb, ipalloc_state, nodemap,
1695                 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1696         if (available_ips == NULL) {
1697                 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1698                 talloc_free(tmp_ctx);
1699                 return -1;
1700         }
1701
1702         if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
1703                 DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
1704                 talloc_free(tmp_ctx);
1705                 return -1;
1706         }
1707
1708         /* Short-circuit IP allocation if no node has available IPs */
1709         can_host_ips = false;
1710         for (i=0; i < ipalloc_state->num; i++) {
1711                 if (ipalloc_state->available_public_ips[i].num != 0) {
1712                         can_host_ips = true;
1713                 }
1714         }
1715         if (!can_host_ips) {
1716                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1717                 goto ipreallocated;
1718         }
1719
1720         /* since nodes only know about those public addresses that
1721            can be served by that particular node, no single node has
1722            a full list of all public addresses that exist in the cluster.
1723            Walk over all node structures and create a merged list of
1724            all public addresses that exist in the cluster.
1725         */
1726         all_ips = create_merged_ip_list(ipalloc_state);
1727         if (all_ips == NULL) {
1728                 talloc_free(tmp_ctx);
1729                 return -1;
1730         }
1731         ipalloc_state->all_ips = all_ips;
1732
1733         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1734
1735         /* Do the IP reassignment calculations */
1736         ipalloc(ipalloc_state);
1737
1738         /* Now tell all nodes to release any public IPs should not
1739          * host.  This will be a NOOP on nodes that don't currently
1740          * hold the given IP.
1741          */
1742         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1743         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1744
1745         async_data->fail_callback = takeover_run_fail_callback;
1746         async_data->callback_data = takeover_data;
1747
1748         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1749
1750         /* Send a RELEASE_IP to all nodes that should not be hosting
1751          * each IP.  For each IP, all but one of these will be
1752          * redundant.  However, the redundant ones are used to tell
1753          * nodes which node should be hosting the IP so that commands
1754          * like "ctdb ip" can display a particular nodes idea of who
1755          * is hosting what. */
1756         for (i=0;i<nodemap->num;i++) {
1757                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1758                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1759                         continue;
1760                 }
1761
1762                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1763                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1764                                 /* This node should be serving this
1765                                    vnn so don't tell it to release the ip
1766                                 */
1767                                 continue;
1768                         }
1769                         ip.pnn  = tmp_ip->pnn;
1770                         ip.addr = tmp_ip->addr;
1771
1772                         timeout = TAKEOVER_TIMEOUT();
1773                         data.dsize = sizeof(ip);
1774                         data.dptr  = (uint8_t *)&ip;
1775                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1776                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1777                                                   data, async_data,
1778                                                   &timeout, NULL);
1779                         if (state == NULL) {
1780                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1781                                 talloc_free(tmp_ctx);
1782                                 return -1;
1783                         }
1784
1785                         ctdb_client_async_add(async_data, state);
1786                 }
1787         }
1788         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1789                 DEBUG(DEBUG_ERR,
1790                       ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1791                 goto fail;
1792         }
1793         talloc_free(async_data);
1794
1795
1796         /* For each IP, send a TAKOVER_IP to the node that should be
1797          * hosting it.  Many of these will often be redundant (since
1798          * the allocation won't have changed) but they can be useful
1799          * to recover from inconsistencies. */
1800         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1801         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1802
1803         async_data->fail_callback = takeover_run_fail_callback;
1804         async_data->callback_data = takeover_data;
1805
1806         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1807                 if (tmp_ip->pnn == -1) {
1808                         /* this IP won't be taken over */
1809                         continue;
1810                 }
1811
1812                 ip.pnn  = tmp_ip->pnn;
1813                 ip.addr = tmp_ip->addr;
1814
1815                 timeout = TAKEOVER_TIMEOUT();
1816                 data.dsize = sizeof(ip);
1817                 data.dptr  = (uint8_t *)&ip;
1818                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1819                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1820                                           data, async_data, &timeout, NULL);
1821                 if (state == NULL) {
1822                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1823                         talloc_free(tmp_ctx);
1824                         return -1;
1825                 }
1826
1827                 ctdb_client_async_add(async_data, state);
1828         }
1829         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1830                 DEBUG(DEBUG_ERR,
1831                       ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1832                 goto fail;
1833         }
1834
1835 ipreallocated:
1836         /*
1837          * Tell all nodes to run eventscripts to process the
1838          * "ipreallocated" event.  This can do a lot of things,
1839          * including restarting services to reconfigure them if public
1840          * IPs have moved.  Once upon a time this event only used to
1841          * update natgw.
1842          */
1843         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1844         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1845                                         nodes, 0, TAKEOVER_TIMEOUT(),
1846                                         false, tdb_null,
1847                                         NULL, takeover_run_fail_callback,
1848                                         takeover_data);
1849         if (ret != 0) {
1850                 DEBUG(DEBUG_ERR,
1851                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1852                 goto fail;
1853         }
1854
1855         talloc_free(tmp_ctx);
1856         return ret;
1857
1858 fail:
1859         takeover_run_process_failures(ctdb, takeover_data);
1860         talloc_free(tmp_ctx);
1861         return -1;
1862 }
1863
1864
1865 /*
1866   destroy a ctdb_client_ip structure
1867  */
1868 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1869 {
1870         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1871                 ctdb_addr_to_str(&ip->addr),
1872                 ntohs(ip->addr.ip.sin_port),
1873                 ip->client_id));
1874
1875         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1876         return 0;
1877 }
1878
1879 /*
1880   called by a client to inform us of a TCP connection that it is managing
1881   that should tickled with an ACK when IP takeover is done
1882  */
1883 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1884                                 TDB_DATA indata)
1885 {
1886         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1887         struct ctdb_connection *tcp_sock = NULL;
1888         struct ctdb_tcp_list *tcp;
1889         struct ctdb_connection t;
1890         int ret;
1891         TDB_DATA data;
1892         struct ctdb_client_ip *ip;
1893         struct ctdb_vnn *vnn;
1894         ctdb_sock_addr addr;
1895
1896         /* If we don't have public IPs, tickles are useless */
1897         if (ctdb->vnn == NULL) {
1898                 return 0;
1899         }
1900
1901         tcp_sock = (struct ctdb_connection *)indata.dptr;
1902
1903         addr = tcp_sock->src;
1904         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1905         addr = tcp_sock->dst;
1906         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1907
1908         ZERO_STRUCT(addr);
1909         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1910         vnn = find_public_ip_vnn(ctdb, &addr);
1911         if (vnn == NULL) {
1912                 switch (addr.sa.sa_family) {
1913                 case AF_INET:
1914                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1915                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1916                                         ctdb_addr_to_str(&addr)));
1917                         }
1918                         break;
1919                 case AF_INET6:
1920                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1921                                 ctdb_addr_to_str(&addr)));
1922                         break;
1923                 default:
1924                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1925                 }
1926
1927                 return 0;
1928         }
1929
1930         if (vnn->pnn != ctdb->pnn) {
1931                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1932                         ctdb_addr_to_str(&addr),
1933                         client_id, client->pid));
1934                 /* failing this call will tell smbd to die */
1935                 return -1;
1936         }
1937
1938         ip = talloc(client, struct ctdb_client_ip);
1939         CTDB_NO_MEMORY(ctdb, ip);
1940
1941         ip->ctdb      = ctdb;
1942         ip->addr      = addr;
1943         ip->client_id = client_id;
1944         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1945         DLIST_ADD(ctdb->client_ip_list, ip);
1946
1947         tcp = talloc(client, struct ctdb_tcp_list);
1948         CTDB_NO_MEMORY(ctdb, tcp);
1949
1950         tcp->connection.src = tcp_sock->src;
1951         tcp->connection.dst = tcp_sock->dst;
1952
1953         DLIST_ADD(client->tcp_list, tcp);
1954
1955         t.src = tcp_sock->src;
1956         t.dst = tcp_sock->dst;
1957
1958         data.dptr = (uint8_t *)&t;
1959         data.dsize = sizeof(t);
1960
1961         switch (addr.sa.sa_family) {
1962         case AF_INET:
1963                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1964                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1965                         ctdb_addr_to_str(&tcp_sock->src),
1966                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1967                 break;
1968         case AF_INET6:
1969                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1970                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1971                         ctdb_addr_to_str(&tcp_sock->src),
1972                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1973                 break;
1974         default:
1975                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1976         }
1977
1978
1979         /* tell all nodes about this tcp connection */
1980         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1981                                        CTDB_CONTROL_TCP_ADD,
1982                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1983         if (ret != 0) {
1984                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1985                 return -1;
1986         }
1987
1988         return 0;
1989 }
1990
1991 /*
1992   find a tcp address on a list
1993  */
1994 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1995                                            struct ctdb_connection *tcp)
1996 {
1997         int i;
1998
1999         if (array == NULL) {
2000                 return NULL;
2001         }
2002
2003         for (i=0;i<array->num;i++) {
2004                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2005                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2006                         return &array->connections[i];
2007                 }
2008         }
2009         return NULL;
2010 }
2011
2012
2013
2014 /*
2015   called by a daemon to inform us of a TCP connection that one of its
2016   clients managing that should tickled with an ACK when IP takeover is
2017   done
2018  */
2019 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2020 {
2021         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2022         struct ctdb_tcp_array *tcparray;
2023         struct ctdb_connection tcp;
2024         struct ctdb_vnn *vnn;
2025
2026         /* If we don't have public IPs, tickles are useless */
2027         if (ctdb->vnn == NULL) {
2028                 return 0;
2029         }
2030
2031         vnn = find_public_ip_vnn(ctdb, &p->dst);
2032         if (vnn == NULL) {
2033                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2034                         ctdb_addr_to_str(&p->dst)));
2035
2036                 return -1;
2037         }
2038
2039
2040         tcparray = vnn->tcp_array;
2041
2042         /* If this is the first tickle */
2043         if (tcparray == NULL) {
2044                 tcparray = talloc(vnn, struct ctdb_tcp_array);
2045                 CTDB_NO_MEMORY(ctdb, tcparray);
2046                 vnn->tcp_array = tcparray;
2047
2048                 tcparray->num = 0;
2049                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2050                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2051
2052                 tcparray->connections[tcparray->num].src = p->src;
2053                 tcparray->connections[tcparray->num].dst = p->dst;
2054                 tcparray->num++;
2055
2056                 if (tcp_update_needed) {
2057                         vnn->tcp_update_needed = true;
2058                 }
2059                 return 0;
2060         }
2061
2062
2063         /* Do we already have this tickle ?*/
2064         tcp.src = p->src;
2065         tcp.dst = p->dst;
2066         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2067                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2068                         ctdb_addr_to_str(&tcp.dst),
2069                         ntohs(tcp.dst.ip.sin_port),
2070                         vnn->pnn));
2071                 return 0;
2072         }
2073
2074         /* A new tickle, we must add it to the array */
2075         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2076                                         struct ctdb_connection,
2077                                         tcparray->num+1);
2078         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2079
2080         tcparray->connections[tcparray->num].src = p->src;
2081         tcparray->connections[tcparray->num].dst = p->dst;
2082         tcparray->num++;
2083
2084         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2085                 ctdb_addr_to_str(&tcp.dst),
2086                 ntohs(tcp.dst.ip.sin_port),
2087                 vnn->pnn));
2088
2089         if (tcp_update_needed) {
2090                 vnn->tcp_update_needed = true;
2091         }
2092
2093         return 0;
2094 }
2095
2096
2097 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2098 {
2099         struct ctdb_connection *tcpp;
2100
2101         if (vnn == NULL) {
2102                 return;
2103         }
2104
2105         /* if the array is empty we cant remove it
2106            and we don't need to do anything
2107          */
2108         if (vnn->tcp_array == NULL) {
2109                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2110                         ctdb_addr_to_str(&conn->dst),
2111                         ntohs(conn->dst.ip.sin_port)));
2112                 return;
2113         }
2114
2115
2116         /* See if we know this connection
2117            if we don't know this connection  then we dont need to do anything
2118          */
2119         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2120         if (tcpp == NULL) {
2121                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2122                         ctdb_addr_to_str(&conn->dst),
2123                         ntohs(conn->dst.ip.sin_port)));
2124                 return;
2125         }
2126
2127
2128         /* We need to remove this entry from the array.
2129            Instead of allocating a new array and copying data to it
2130            we cheat and just copy the last entry in the existing array
2131            to the entry that is to be removed and just shring the 
2132            ->num field
2133          */
2134         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2135         vnn->tcp_array->num--;
2136
2137         /* If we deleted the last entry we also need to remove the entire array
2138          */
2139         if (vnn->tcp_array->num == 0) {
2140                 talloc_free(vnn->tcp_array);
2141                 vnn->tcp_array = NULL;
2142         }               
2143
2144         vnn->tcp_update_needed = true;
2145
2146         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2147                 ctdb_addr_to_str(&conn->src),
2148                 ntohs(conn->src.ip.sin_port)));
2149 }
2150
2151
2152 /*
2153   called by a daemon to inform us of a TCP connection that one of its
2154   clients used are no longer needed in the tickle database
2155  */
2156 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2157 {
2158         struct ctdb_vnn *vnn;
2159         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2160
2161         /* If we don't have public IPs, tickles are useless */
2162         if (ctdb->vnn == NULL) {
2163                 return 0;
2164         }
2165
2166         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2167         if (vnn == NULL) {
2168                 DEBUG(DEBUG_ERR,
2169                       (__location__ " unable to find public address %s\n",
2170                        ctdb_addr_to_str(&conn->dst)));
2171                 return 0;
2172         }
2173
2174         ctdb_remove_connection(vnn, conn);
2175
2176         return 0;
2177 }
2178
2179
2180 /*
2181   Called when another daemon starts - causes all tickles for all
2182   public addresses we are serving to be sent to the new node on the
2183   next check.  This actually causes the next scheduled call to
2184   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2185   doesn't require careful error handling.
2186  */
2187 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2188 {
2189         struct ctdb_vnn *vnn;
2190
2191         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2192                            (unsigned long) pnn));
2193
2194         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2195                 vnn->tcp_update_needed = true;
2196         }
2197
2198         return 0;
2199 }
2200
2201
2202 /*
2203   called when a client structure goes away - hook to remove
2204   elements from the tcp_list in all daemons
2205  */
2206 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2207 {
2208         while (client->tcp_list) {
2209                 struct ctdb_vnn *vnn;
2210                 struct ctdb_tcp_list *tcp = client->tcp_list;
2211                 struct ctdb_connection *conn = &tcp->connection;
2212
2213                 DLIST_REMOVE(client->tcp_list, tcp);
2214
2215                 vnn = find_public_ip_vnn(client->ctdb,
2216                                          &conn->dst);
2217                 if (vnn == NULL) {
2218                         DEBUG(DEBUG_ERR,
2219                               (__location__ " unable to find public address %s\n",
2220                                ctdb_addr_to_str(&conn->dst)));
2221                         continue;
2222                 }
2223
2224                 /* If the IP address is hosted on this node then
2225                  * remove the connection. */
2226                 if (vnn->pnn == client->ctdb->pnn) {
2227                         ctdb_remove_connection(vnn, conn);
2228                 }
2229
2230                 /* Otherwise this function has been called because the
2231                  * server IP address has been released to another node
2232                  * and the client has exited.  This means that we
2233                  * should not delete the connection information.  The
2234                  * takeover node processes connections too. */
2235         }
2236 }
2237
2238
2239 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2240 {
2241         struct ctdb_vnn *vnn;
2242         int count = 0;
2243         TDB_DATA data;
2244
2245         if (ctdb->tunable.disable_ip_failover == 1) {
2246                 return;
2247         }
2248
2249         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2250                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2251                         ctdb_vnn_unassign_iface(ctdb, vnn);
2252                         continue;
2253                 }
2254                 if (!vnn->iface) {
2255                         continue;
2256                 }
2257
2258                 /* Don't allow multiple releases at once.  Some code,
2259                  * particularly ctdb_tickle_sentenced_connections() is
2260                  * not re-entrant */
2261                 if (vnn->update_in_flight) {
2262                         DEBUG(DEBUG_WARNING,
2263                               (__location__
2264                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2265                                     ctdb_addr_to_str(&vnn->public_address),
2266                                     vnn->public_netmask_bits,
2267                                     ctdb_vnn_iface_string(vnn)));
2268                         continue;
2269                 }
2270                 vnn->update_in_flight = true;
2271
2272                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2273                                     ctdb_addr_to_str(&vnn->public_address),
2274                                     vnn->public_netmask_bits,
2275                                     ctdb_vnn_iface_string(vnn)));
2276
2277                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2278                                   ctdb_vnn_iface_string(vnn),
2279                                   ctdb_addr_to_str(&vnn->public_address),
2280                                   vnn->public_netmask_bits);
2281
2282                 data.dptr = (uint8_t *)talloc_strdup(
2283                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2284                 if (data.dptr != NULL) {
2285                         data.dsize = strlen((char *)data.dptr) + 1;
2286                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2287                                                  CTDB_SRVID_RELEASE_IP, data);
2288                         talloc_free(data.dptr);
2289                 }
2290
2291                 ctdb_vnn_unassign_iface(ctdb, vnn);
2292                 vnn->update_in_flight = false;
2293                 count++;
2294         }
2295
2296         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2297 }
2298
2299
2300 /*
2301   get list of public IPs
2302  */
2303 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2304                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2305 {
2306         int i, num, len;
2307         struct ctdb_public_ip_list_old *ips;
2308         struct ctdb_vnn *vnn;
2309         bool only_available = false;
2310
2311         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2312                 only_available = true;
2313         }
2314
2315         /* count how many public ip structures we have */
2316         num = 0;
2317         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2318                 num++;
2319         }
2320
2321         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2322                 num*sizeof(struct ctdb_public_ip);
2323         ips = talloc_zero_size(outdata, len);
2324         CTDB_NO_MEMORY(ctdb, ips);
2325
2326         i = 0;
2327         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2328                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2329                         continue;
2330                 }
2331                 ips->ips[i].pnn  = vnn->pnn;
2332                 ips->ips[i].addr = vnn->public_address;
2333                 i++;
2334         }
2335         ips->num = i;
2336         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2337                 i*sizeof(struct ctdb_public_ip);
2338
2339         outdata->dsize = len;
2340         outdata->dptr  = (uint8_t *)ips;
2341
2342         return 0;
2343 }
2344
2345
2346 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2347                                         struct ctdb_req_control_old *c,
2348                                         TDB_DATA indata,
2349                                         TDB_DATA *outdata)
2350 {
2351         int i, num, len;
2352         ctdb_sock_addr *addr;
2353         struct ctdb_public_ip_info_old *info;
2354         struct ctdb_vnn *vnn;
2355
2356         addr = (ctdb_sock_addr *)indata.dptr;
2357
2358         vnn = find_public_ip_vnn(ctdb, addr);
2359         if (vnn == NULL) {
2360                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2361                                  "'%s'not a public address\n",
2362                                  ctdb_addr_to_str(addr)));
2363                 return -1;
2364         }
2365
2366         /* count how many public ip structures we have */
2367         num = 0;
2368         for (;vnn->ifaces[num];) {
2369                 num++;
2370         }
2371
2372         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2373                 num*sizeof(struct ctdb_iface);
2374         info = talloc_zero_size(outdata, len);
2375         CTDB_NO_MEMORY(ctdb, info);
2376
2377         info->ip.addr = vnn->public_address;
2378         info->ip.pnn = vnn->pnn;
2379         info->active_idx = 0xFFFFFFFF;
2380
2381         for (i=0; vnn->ifaces[i]; i++) {
2382                 struct ctdb_interface *cur;
2383
2384                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2385                 if (cur == NULL) {
2386                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2387                                            vnn->ifaces[i]));
2388                         return -1;
2389                 }
2390                 if (vnn->iface == cur) {
2391                         info->active_idx = i;
2392                 }
2393                 strncpy(info->ifaces[i].name, cur->name,
2394                         sizeof(info->ifaces[i].name));
2395                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2396                 info->ifaces[i].link_state = cur->link_up;
2397                 info->ifaces[i].references = cur->references;
2398         }
2399         info->num = i;
2400         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2401                 i*sizeof(struct ctdb_iface);
2402
2403         outdata->dsize = len;
2404         outdata->dptr  = (uint8_t *)info;
2405
2406         return 0;
2407 }
2408
2409 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2410                                 struct ctdb_req_control_old *c,
2411                                 TDB_DATA *outdata)
2412 {
2413         int i, num, len;
2414         struct ctdb_iface_list_old *ifaces;
2415         struct ctdb_interface *cur;
2416
2417         /* count how many public ip structures we have */
2418         num = 0;
2419         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2420                 num++;
2421         }
2422
2423         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2424                 num*sizeof(struct ctdb_iface);
2425         ifaces = talloc_zero_size(outdata, len);
2426         CTDB_NO_MEMORY(ctdb, ifaces);
2427
2428         i = 0;
2429         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2430                 strncpy(ifaces->ifaces[i].name, cur->name,
2431                         sizeof(ifaces->ifaces[i].name));
2432                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2433                 ifaces->ifaces[i].link_state = cur->link_up;
2434                 ifaces->ifaces[i].references = cur->references;
2435                 i++;
2436         }
2437         ifaces->num = i;
2438         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2439                 i*sizeof(struct ctdb_iface);
2440
2441         outdata->dsize = len;
2442         outdata->dptr  = (uint8_t *)ifaces;
2443
2444         return 0;
2445 }
2446
2447 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2448                                     struct ctdb_req_control_old *c,
2449                                     TDB_DATA indata)
2450 {
2451         struct ctdb_iface *info;
2452         struct ctdb_interface *iface;
2453         bool link_up = false;
2454
2455         info = (struct ctdb_iface *)indata.dptr;
2456
2457         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2458                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2459                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2460                                   len, len, info->name));
2461                 return -1;
2462         }
2463
2464         switch (info->link_state) {
2465         case 0:
2466                 link_up = false;
2467                 break;
2468         case 1:
2469                 link_up = true;
2470                 break;
2471         default:
2472                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2473                                   (unsigned int)info->link_state));
2474                 return -1;
2475         }
2476
2477         if (info->references != 0) {
2478                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2479                                   (unsigned int)info->references));
2480                 return -1;
2481         }
2482
2483         iface = ctdb_find_iface(ctdb, info->name);
2484         if (iface == NULL) {
2485                 return -1;
2486         }
2487
2488         if (link_up == iface->link_up) {
2489                 return 0;
2490         }
2491
2492         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2493               ("iface[%s] has changed it's link status %s => %s\n",
2494                iface->name,
2495                iface->link_up?"up":"down",
2496                link_up?"up":"down"));
2497
2498         iface->link_up = link_up;
2499         return 0;
2500 }
2501
2502
2503 /*
2504   called by a daemon to inform us of the entire list of TCP tickles for
2505   a particular public address.
2506   this control should only be sent by the node that is currently serving
2507   that public address.
2508  */
2509 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2510 {
2511         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2512         struct ctdb_tcp_array *tcparray;
2513         struct ctdb_vnn *vnn;
2514
2515         /* We must at least have tickles.num or else we cant verify the size
2516            of the received data blob
2517          */
2518         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2519                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2520                 return -1;
2521         }
2522
2523         /* verify that the size of data matches what we expect */
2524         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2525                          + sizeof(struct ctdb_connection) * list->num) {
2526                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2527                 return -1;
2528         }
2529
2530         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2531                            ctdb_addr_to_str(&list->addr)));
2532
2533         vnn = find_public_ip_vnn(ctdb, &list->addr);
2534         if (vnn == NULL) {
2535                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2536                         ctdb_addr_to_str(&list->addr)));
2537
2538                 return 1;
2539         }
2540
2541         if (vnn->pnn == ctdb->pnn) {
2542                 DEBUG(DEBUG_INFO,
2543                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2544                        ctdb_addr_to_str(&list->addr)));
2545                 return 0;
2546         }
2547
2548         /* remove any old ticklelist we might have */
2549         talloc_free(vnn->tcp_array);
2550         vnn->tcp_array = NULL;
2551
2552         tcparray = talloc(vnn, struct ctdb_tcp_array);
2553         CTDB_NO_MEMORY(ctdb, tcparray);
2554
2555         tcparray->num = list->num;
2556
2557         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2558         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2559
2560         memcpy(tcparray->connections, &list->connections[0],
2561                sizeof(struct ctdb_connection)*tcparray->num);
2562
2563         /* We now have a new fresh tickle list array for this vnn */
2564         vnn->tcp_array = tcparray;
2565
2566         return 0;
2567 }
2568
2569 /*
2570   called to return the full list of tickles for the puclic address associated 
2571   with the provided vnn
2572  */
2573 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2574 {
2575         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2576         struct ctdb_tickle_list_old *list;
2577         struct ctdb_tcp_array *tcparray;
2578         int num, i;
2579         struct ctdb_vnn *vnn;
2580         unsigned port;
2581
2582         vnn = find_public_ip_vnn(ctdb, addr);
2583         if (vnn == NULL) {
2584                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2585                         ctdb_addr_to_str(addr)));
2586
2587                 return 1;
2588         }
2589
2590         port = ctdb_addr_to_port(addr);
2591
2592         tcparray = vnn->tcp_array;
2593         num = 0;
2594         if (tcparray != NULL) {
2595                 if (port == 0) {
2596                         /* All connections */
2597                         num = tcparray->num;
2598                 } else {
2599                         /* Count connections for port */
2600                         for (i = 0; i < tcparray->num; i++) {
2601                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2602                                         num++;
2603                                 }
2604                         }
2605                 }
2606         }
2607
2608         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2609                         + sizeof(struct ctdb_connection) * num;
2610
2611         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2612         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2613         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2614
2615         list->addr = *addr;
2616         list->num = num;
2617
2618         if (num == 0) {
2619                 return 0;
2620         }
2621
2622         num = 0;
2623         for (i = 0; i < tcparray->num; i++) {
2624                 if (port == 0 || \
2625                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2626                         list->connections[num] = tcparray->connections[i];
2627                         num++;
2628                 }
2629         }
2630
2631         return 0;
2632 }
2633
2634
2635 /*
2636   set the list of all tcp tickles for a public address
2637  */
2638 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2639                                             ctdb_sock_addr *addr,
2640                                             struct ctdb_tcp_array *tcparray)
2641 {
2642         int ret, num;
2643         TDB_DATA data;
2644         struct ctdb_tickle_list_old *list;
2645
2646         if (tcparray) {
2647                 num = tcparray->num;
2648         } else {
2649                 num = 0;
2650         }
2651
2652         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2653                         sizeof(struct ctdb_connection) * num;
2654         data.dptr = talloc_size(ctdb, data.dsize);
2655         CTDB_NO_MEMORY(ctdb, data.dptr);
2656
2657         list = (struct ctdb_tickle_list_old *)data.dptr;
2658         list->addr = *addr;
2659         list->num = num;
2660         if (tcparray) {
2661                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2662         }
2663
2664         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2665                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2666                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2667         if (ret != 0) {
2668                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2669                 return -1;
2670         }
2671
2672         talloc_free(data.dptr);
2673
2674         return ret;
2675 }
2676
2677
2678 /*
2679   perform tickle updates if required
2680  */
2681 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2682                                     struct tevent_timer *te,
2683                                     struct timeval t, void *private_data)
2684 {
2685         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2686         int ret;
2687         struct ctdb_vnn *vnn;
2688
2689         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2690                 /* we only send out updates for public addresses that 
2691                    we have taken over
2692                  */
2693                 if (ctdb->pnn != vnn->pnn) {
2694                         continue;
2695                 }
2696                 /* We only send out the updates if we need to */
2697                 if (!vnn->tcp_update_needed) {
2698                         continue;
2699                 }
2700                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2701                                                        &vnn->public_address,
2702                                                        vnn->tcp_array);
2703                 if (ret != 0) {
2704                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2705                                 ctdb_addr_to_str(&vnn->public_address)));
2706                 } else {
2707                         DEBUG(DEBUG_INFO,
2708                               ("Sent tickle update for public address %s\n",
2709                                ctdb_addr_to_str(&vnn->public_address)));
2710                         vnn->tcp_update_needed = false;
2711                 }
2712         }
2713
2714         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2715                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2716                          ctdb_update_tcp_tickles, ctdb);
2717 }
2718
2719 /*
2720   start periodic update of tcp tickles
2721  */
2722 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2723 {
2724         ctdb->tickle_update_context = talloc_new(ctdb);
2725
2726         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2727                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2728                          ctdb_update_tcp_tickles, ctdb);
2729 }
2730
2731
2732
2733
2734 struct control_gratious_arp {
2735         struct ctdb_context *ctdb;
2736         ctdb_sock_addr addr;
2737         const char *iface;
2738         int count;
2739 };
2740
2741 /*
2742   send a control_gratuitous arp
2743  */
2744 static void send_gratious_arp(struct tevent_context *ev,
2745                               struct tevent_timer *te,
2746                               struct timeval t, void *private_data)
2747 {
2748         int ret;
2749         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2750                                                         struct control_gratious_arp);
2751
2752         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2753         if (ret != 0) {
2754                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2755                                  arp->iface, strerror(errno)));
2756         }
2757
2758
2759         arp->count++;
2760         if (arp->count == CTDB_ARP_REPEAT) {
2761                 talloc_free(arp);
2762                 return;
2763         }
2764
2765         tevent_add_timer(arp->ctdb->ev, arp,
2766                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2767                          send_gratious_arp, arp);
2768 }
2769
2770
2771 /*
2772   send a gratious arp 
2773  */
2774 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2775 {
2776         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2777         struct control_gratious_arp *arp;
2778
2779         /* verify the size of indata */
2780         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2781                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2782                                  (unsigned)indata.dsize, 
2783                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2784                 return -1;
2785         }
2786         if (indata.dsize != 
2787                 ( offsetof(struct ctdb_addr_info_old, iface)
2788                 + gratious_arp->len ) ){
2789
2790                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2791                         "but should be %u bytes\n", 
2792                          (unsigned)indata.dsize, 
2793                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2794                 return -1;
2795         }
2796
2797
2798         arp = talloc(ctdb, struct control_gratious_arp);
2799         CTDB_NO_MEMORY(ctdb, arp);
2800
2801         arp->ctdb  = ctdb;
2802         arp->addr   = gratious_arp->addr;
2803         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2804         CTDB_NO_MEMORY(ctdb, arp->iface);
2805         arp->count = 0;
2806
2807         tevent_add_timer(arp->ctdb->ev, arp,
2808                          timeval_zero(), send_gratious_arp, arp);
2809
2810         return 0;
2811 }
2812
2813 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2814 {
2815         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2816         int ret;
2817
2818         /* verify the size of indata */
2819         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2820                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2821                 return -1;
2822         }
2823         if (indata.dsize != 
2824                 ( offsetof(struct ctdb_addr_info_old, iface)
2825                 + pub->len ) ){
2826
2827                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2828                         "but should be %u bytes\n", 
2829                          (unsigned)indata.dsize, 
2830                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2831                 return -1;
2832         }
2833
2834         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2835
2836         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2837
2838         if (ret != 0) {
2839                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2840                 return -1;
2841         }
2842
2843         return 0;
2844 }
2845
2846 struct delete_ip_callback_state {
2847         struct ctdb_req_control_old *c;
2848 };
2849
2850 /*
2851   called when releaseip event finishes for del_public_address
2852  */
2853 static void delete_ip_callback(struct ctdb_context *ctdb,
2854                                int32_t status, TDB_DATA data,
2855                                const char *errormsg,
2856                                void *private_data)
2857 {
2858         struct delete_ip_callback_state *state =
2859                 talloc_get_type(private_data, struct delete_ip_callback_state);
2860
2861         /* If release failed then fail. */
2862         ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2863         talloc_free(private_data);
2864 }
2865
2866 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2867                                         struct ctdb_req_control_old *c,
2868                                         TDB_DATA indata, bool *async_reply)
2869 {
2870         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2871         struct ctdb_vnn *vnn;
2872
2873         /* verify the size of indata */
2874         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2875                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2876                 return -1;
2877         }
2878         if (indata.dsize != 
2879                 ( offsetof(struct ctdb_addr_info_old, iface)
2880                 + pub->len ) ){
2881
2882                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2883                         "but should be %u bytes\n", 
2884                          (unsigned)indata.dsize, 
2885                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2886                 return -1;
2887         }
2888
2889         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2890
2891         /* walk over all public addresses until we find a match */
2892         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2893                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2894                         if (vnn->pnn == ctdb->pnn) {
2895                                 struct delete_ip_callback_state *state;
2896                                 struct ctdb_public_ip *ip;
2897                                 TDB_DATA data;
2898                                 int ret;
2899
2900                                 vnn->delete_pending = true;
2901
2902                                 state = talloc(ctdb,
2903                                                struct delete_ip_callback_state);
2904                                 CTDB_NO_MEMORY(ctdb, state);
2905                                 state->c = c;
2906
2907                                 ip = talloc(state, struct ctdb_public_ip);
2908                                 if (ip == NULL) {
2909                                         DEBUG(DEBUG_ERR,
2910                                               (__location__ " Out of memory\n"));
2911                                         talloc_free(state);
2912                                         return -1;
2913                                 }
2914                                 ip->pnn = -1;
2915                                 ip->addr = pub->addr;
2916
2917                                 data.dsize = sizeof(struct ctdb_public_ip);
2918                                 data.dptr = (unsigned char *)ip;
2919
2920                                 ret = ctdb_daemon_send_control(ctdb,
2921                                                                ctdb_get_pnn(ctdb),
2922                                                                0,
2923                                                                CTDB_CONTROL_RELEASE_IP,
2924                                                                0, 0,
2925                                                                data,
2926                                                                delete_ip_callback,
2927                                                                state);
2928                                 if (ret == -1) {
2929                                         DEBUG(DEBUG_ERR,
2930                                               (__location__ "Unable to send "
2931                                                "CTDB_CONTROL_RELEASE_IP\n"));
2932                                         talloc_free(state);
2933                                         return -1;
2934                                 }
2935
2936                                 state->c = talloc_steal(state, c);
2937                                 *async_reply = true;
2938                         } else {
2939                                 /* This IP is not hosted on the
2940                                  * current node so just delete it
2941                                  * now. */
2942                                 do_delete_ip(ctdb, vnn);
2943                         }
2944
2945                         return 0;
2946                 }
2947         }
2948
2949         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2950                          ctdb_addr_to_str(&pub->addr)));
2951         return -1;
2952 }
2953
2954
2955 struct ipreallocated_callback_state {
2956         struct ctdb_req_control_old *c;
2957 };
2958
2959 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2960                                         int status, void *p)
2961 {
2962         struct ipreallocated_callback_state *state =
2963                 talloc_get_type(p, struct ipreallocated_callback_state);
2964
2965         if (status != 0) {
2966                 DEBUG(DEBUG_ERR,
2967                       (" \"ipreallocated\" event script failed (status %d)\n",
2968                        status));
2969                 if (status == -ETIME) {
2970                         ctdb_ban_self(ctdb);
2971                 }
2972         }
2973
2974         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2975         talloc_free(state);
2976 }
2977
2978 /* A control to run the ipreallocated event */
2979 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2980                                    struct ctdb_req_control_old *c,
2981                                    bool *async_reply)
2982 {
2983         int ret;
2984         struct ipreallocated_callback_state *state;
2985
2986         state = talloc(ctdb, struct ipreallocated_callback_state);
2987         CTDB_NO_MEMORY(ctdb, state);
2988
2989         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2990
2991         ret = ctdb_event_script_callback(ctdb, state,
2992                                          ctdb_ipreallocated_callback, state,
2993                                          CTDB_EVENT_IPREALLOCATED,
2994                                          "%s", "");
2995
2996         if (ret != 0) {
2997                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2998                 talloc_free(state);
2999                 return -1;
3000         }
3001
3002         /* tell the control that we will be reply asynchronously */
3003         state->c    = talloc_steal(state, c);
3004         *async_reply = true;
3005
3006         return 0;
3007 }
3008
3009
3010 struct ctdb_reloadips_handle {
3011         struct ctdb_context *ctdb;
3012         struct ctdb_req_control_old *c;
3013         int status;
3014         int fd[2];
3015         pid_t child;
3016         struct tevent_fd *fde;
3017 };
3018
3019 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3020 {
3021         if (h == h->ctdb->reload_ips) {
3022                 h->ctdb->reload_ips = NULL;
3023         }
3024         if (h->c != NULL) {
3025                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3026                 h->c = NULL;
3027         }
3028         ctdb_kill(h->ctdb, h->child, SIGKILL);
3029         return 0;
3030 }
3031
3032 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3033                                          struct tevent_timer *te,
3034                                          struct timeval t, void *private_data)
3035 {
3036         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3037
3038         talloc_free(h);
3039 }
3040
3041 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3042                                          struct tevent_fd *fde,
3043                                          uint16_t flags, void *private_data)
3044 {
3045         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3046
3047         char res;
3048         int ret;
3049
3050         ret = sys_read(h->fd[0], &res, 1);
3051         if (ret < 1 || res != 0) {
3052                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3053                 res = 1;
3054         }
3055         h->status = res;
3056
3057         talloc_free(h);
3058 }
3059
3060 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3061 {
3062         TALLOC_CTX *mem_ctx = talloc_new(NULL);
3063         struct ctdb_public_ip_list_old *ips;
3064         struct ctdb_vnn *vnn;
3065         struct client_async_data *async_data;
3066         struct timeval timeout;
3067         TDB_DATA data;
3068         struct ctdb_client_control_state *state;
3069         bool first_add;
3070         int i, ret;
3071
3072         CTDB_NO_MEMORY(ctdb, mem_ctx);
3073
3074         /* Read IPs from local node */
3075         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3076                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
3077         if (ret != 0) {
3078                 DEBUG(DEBUG_ERR,
3079                       ("Unable to fetch public IPs from local node\n"));
3080                 talloc_free(mem_ctx);
3081                 return -1;
3082         }
3083
3084         /* Read IPs file - this is safe since this is a child process */
3085         ctdb->vnn = NULL;
3086         if (ctdb_set_public_addresses(ctdb, false) != 0) {
3087                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3088                 talloc_free(mem_ctx);
3089                 return -1;
3090         }
3091
3092         async_data = talloc_zero(mem_ctx, struct client_async_data);
3093         CTDB_NO_MEMORY(ctdb, async_data);
3094
3095         /* Compare IPs between node and file for IPs to be deleted */
3096         for (i = 0; i < ips->num; i++) {
3097                 /* */
3098                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3099                         if (ctdb_same_ip(&vnn->public_address,
3100                                          &ips->ips[i].addr)) {
3101                                 /* IP is still in file */
3102                                 break;
3103                         }
3104                 }
3105
3106                 if (vnn == NULL) {
3107                         /* Delete IP ips->ips[i] */
3108                         struct ctdb_addr_info_old *pub;
3109
3110                         DEBUG(DEBUG_NOTICE,
3111                               ("IP %s no longer configured, deleting it\n",
3112                                ctdb_addr_to_str(&ips->ips[i].addr)));
3113
3114                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3115                         CTDB_NO_MEMORY(ctdb, pub);
3116
3117                         pub->addr  = ips->ips[i].addr;
3118                         pub->mask  = 0;
3119                         pub->len   = 0;
3120
3121                         timeout = TAKEOVER_TIMEOUT();
3122
3123                         data.dsize = offsetof(struct ctdb_addr_info_old,
3124                                               iface) + pub->len;
3125                         data.dptr = (uint8_t *)pub;
3126
3127                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3128                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
3129                                                   0, data, async_data,
3130                                                   &timeout, NULL);
3131                         if (state == NULL) {
3132                                 DEBUG(DEBUG_ERR,
3133                                       (__location__
3134                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3135                                 goto failed;
3136                         }
3137
3138                         ctdb_client_async_add(async_data, state);
3139                 }
3140         }
3141
3142         /* Compare IPs between node and file for IPs to be added */
3143         first_add = true;
3144         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3145                 for (i = 0; i < ips->num; i++) {
3146                         if (ctdb_same_ip(&vnn->public_address,
3147                                          &ips->ips[i].addr)) {
3148                                 /* IP already on node */
3149                                 break;
3150                         }
3151                 }
3152                 if (i == ips->num) {
3153                         /* Add IP ips->ips[i] */
3154                         struct ctdb_addr_info_old *pub;
3155                         const char *ifaces = NULL;
3156                         uint32_t len;
3157                         int iface = 0;
3158
3159                         DEBUG(DEBUG_NOTICE,
3160                               ("New IP %s configured, adding it\n",
3161                                ctdb_addr_to_str(&vnn->public_address)));
3162                         if (first_add) {
3163                                 uint32_t pnn = ctdb_get_pnn(ctdb);
3164
3165                                 data.dsize = sizeof(pnn);
3166                                 data.dptr  = (uint8_t *)&pnn;
3167
3168                                 ret = ctdb_client_send_message(
3169                                         ctdb,
3170                                         CTDB_BROADCAST_CONNECTED,
3171                                         CTDB_SRVID_REBALANCE_NODE,
3172                                         data);
3173                                 if (ret != 0) {
3174                                         DEBUG(DEBUG_WARNING,
3175                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3176                                 }
3177
3178                                 first_add = false;
3179                         }
3180
3181                         ifaces = vnn->ifaces[0];
3182                         iface = 1;
3183                         while (vnn->ifaces[iface] != NULL) {
3184                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3185                                                          vnn->ifaces[iface]);
3186                                 iface++;
3187                         }
3188
3189                         len   = strlen(ifaces) + 1;
3190                         pub = talloc_zero_size(mem_ctx,
3191                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3192                         CTDB_NO_MEMORY(ctdb, pub);
3193
3194                         pub->addr  = vnn->public_address;
3195                         pub->mask  = vnn->public_netmask_bits;
3196                         pub->len   = len;
3197                         memcpy(&pub->iface[0], ifaces, pub->len);
3198
3199                         timeout = TAKEOVER_TIMEOUT();
3200
3201                         data.dsize = offsetof(struct ctdb_addr_info_old,
3202                                               iface) + pub->len;
3203                         data.dptr = (uint8_t *)pub;
3204
3205                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3206                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3207                                                   0, data, async_data,
3208                                                   &timeout, NULL);
3209                         if (state == NULL) {
3210                                 DEBUG(DEBUG_ERR,
3211                                       (__location__
3212                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3213                                 goto failed;
3214                         }
3215
3216                         ctdb_client_async_add(async_data, state);
3217                 }
3218         }
3219
3220         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3221                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3222                 goto failed;
3223         }
3224
3225         talloc_free(mem_ctx);
3226         return 0;
3227
3228 failed:
3229         talloc_free(mem_ctx);
3230         return -1;
3231 }
3232
3233 /* This control is sent to force the node to re-read the public addresses file
3234    and drop any addresses we should nnot longer host, and add new addresses
3235    that we are now able to host
3236 */
3237 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3238 {
3239         struct ctdb_reloadips_handle *h;
3240         pid_t parent = getpid();
3241
3242         if (ctdb->reload_ips != NULL) {
3243                 talloc_free(ctdb->reload_ips);
3244                 ctdb->reload_ips = NULL;
3245         }
3246
3247         h = talloc(ctdb, struct ctdb_reloadips_handle);
3248         CTDB_NO_MEMORY(ctdb, h);
3249         h->ctdb     = ctdb;
3250         h->c        = NULL;
3251         h->status   = -1;
3252         
3253         if (pipe(h->fd) == -1) {
3254                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3255                 talloc_free(h);
3256                 return -1;
3257         }
3258
3259         h->child = ctdb_fork(ctdb);
3260         if (h->child == (pid_t)-1) {
3261                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3262                 close(h->fd[0]);
3263                 close(h->fd[1]);
3264                 talloc_free(h);
3265                 return -1;
3266         }
3267
3268         /* child process */
3269         if (h->child == 0) {
3270                 signed char res = 0;
3271
3272                 close(h->fd[0]);
3273                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3274
3275                 prctl_set_comment("ctdb_reloadips");
3276                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3277                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3278                         res = -1;
3279                 } else {
3280                         res = ctdb_reloadips_child(ctdb);
3281                         if (res != 0) {
3282                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3283                         }
3284                 }
3285
3286                 sys_write(h->fd[1], &res, 1);
3287                 ctdb_wait_for_process_to_exit(parent);
3288                 _exit(0);
3289         }
3290
3291         h->c             = talloc_steal(h, c);
3292
3293         close(h->fd[1]);
3294         set_close_on_exec(h->fd[0]);
3295
3296         talloc_set_destructor(h, ctdb_reloadips_destructor);
3297
3298
3299         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3300                                ctdb_reloadips_child_handler, (void *)h);
3301         tevent_fd_set_auto_close(h->fde);
3302
3303         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3304                          ctdb_reloadips_timeout_event, h);
3305
3306         /* we reply later */
3307         *async_reply = true;
3308         return 0;
3309 }