513da63cd3823fef85aa0cae517e4a4ad0c723f3
[sfrench/samba-autobuild/.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
35
36 #include "ctdb_private.h"
37 #include "ctdb_client.h"
38
39 #include "common/rb_tree.h"
40 #include "common/reqid.h"
41 #include "common/system.h"
42 #include "common/common.h"
43 #include "common/logging.h"
44
45 #include "server/ipalloc.h"
46
47 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48
49 #define CTDB_ARP_INTERVAL 1
50 #define CTDB_ARP_REPEAT   3
51
52 struct ctdb_interface {
53         struct ctdb_interface *prev, *next;
54         const char *name;
55         bool link_up;
56         uint32_t references;
57 };
58
59 struct vnn_interface {
60         struct vnn_interface *prev, *next;
61         struct ctdb_interface *iface;
62 };
63
64 /* state associated with a public ip address */
65 struct ctdb_vnn {
66         struct ctdb_vnn *prev, *next;
67
68         struct ctdb_interface *iface;
69         struct vnn_interface *ifaces;
70         ctdb_sock_addr public_address;
71         uint8_t public_netmask_bits;
72
73         /* the node number that is serving this public address, if any.
74            If no node serves this ip it is set to -1 */
75         int32_t pnn;
76
77         /* List of clients to tickle for this public address */
78         struct ctdb_tcp_array *tcp_array;
79
80         /* whether we need to update the other nodes with changes to our list
81            of connected clients */
82         bool tcp_update_needed;
83
84         /* a context to hang sending gratious arp events off */
85         TALLOC_CTX *takeover_ctx;
86
87         /* Set to true any time an update to this VNN is in flight.
88            This helps to avoid races. */
89         bool update_in_flight;
90
91         /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
92          * address then this flag is set.  It will be deleted in the
93          * release IP callback. */
94         bool delete_pending;
95 };
96
97 static const char *iface_string(const struct ctdb_interface *iface)
98 {
99         return (iface != NULL ? iface->name : "__none__");
100 }
101
102 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
103 {
104         return iface_string(vnn->iface);
105 }
106
107 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
108                                               const char *iface);
109
110 static struct ctdb_interface *
111 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
112 {
113         struct ctdb_interface *i;
114
115         if (strlen(iface) > CTDB_IFACE_SIZE) {
116                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
117                 return NULL;
118         }
119
120         /* Verify that we don't have an entry for this ip yet */
121         i = ctdb_find_iface(ctdb, iface);
122         if (i != NULL) {
123                 return i;
124         }
125
126         /* create a new structure for this interface */
127         i = talloc_zero(ctdb, struct ctdb_interface);
128         if (i == NULL) {
129                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
130                 return NULL;
131         }
132         i->name = talloc_strdup(i, iface);
133         if (i->name == NULL) {
134                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
135                 talloc_free(i);
136                 return NULL;
137         }
138
139         i->link_up = true;
140
141         DLIST_ADD(ctdb->ifaces, i);
142
143         return i;
144 }
145
146 static bool vnn_has_interface(struct ctdb_vnn *vnn,
147                               const struct ctdb_interface *iface)
148 {
149         struct vnn_interface *i;
150
151         for (i = vnn->ifaces; i != NULL; i = i->next) {
152                 if (iface == i->iface) {
153                         return true;
154                 }
155         }
156
157         return false;
158 }
159
160 /* If any interfaces now have no possible IPs then delete them.  This
161  * implementation is naive (i.e. simple) rather than clever
162  * (i.e. complex).  Given that this is run on delip and that operation
163  * is rare, this doesn't need to be efficient - it needs to be
164  * foolproof.  One alternative is reference counting, where the logic
165  * is distributed and can, therefore, be broken in multiple places.
166  * Another alternative is to build a red-black tree of interfaces that
167  * can have addresses (by walking ctdb->vnn once) and then walking
168  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
169  * one of those if the naive implementation causes problems...  :-)
170  */
171 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
172                                         struct ctdb_vnn *vnn)
173 {
174         struct ctdb_interface *i, *next;
175
176         /* For each interface, check if there's an IP using it. */
177         for (i = ctdb->ifaces; i != NULL; i = next) {
178                 struct ctdb_vnn *tv;
179                 bool found;
180                 next = i->next;
181
182                 /* Only consider interfaces named in the given VNN. */
183                 if (!vnn_has_interface(vnn, i)) {
184                         continue;
185                 }
186
187                 /* Search for a vnn with this interface. */
188                 found = false;
189                 for (tv=ctdb->vnn; tv; tv=tv->next) {
190                         if (vnn_has_interface(tv, i)) {
191                                 found = true;
192                                 break;
193                         }
194                 }
195
196                 if (!found) {
197                         /* None of the VNNs are using this interface. */
198                         DLIST_REMOVE(ctdb->ifaces, i);
199                         talloc_free(i);
200                 }
201         }
202 }
203
204
205 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
206                                               const char *iface)
207 {
208         struct ctdb_interface *i;
209
210         for (i=ctdb->ifaces;i;i=i->next) {
211                 if (strcmp(i->name, iface) == 0) {
212                         return i;
213                 }
214         }
215
216         return NULL;
217 }
218
219 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
220                                                   struct ctdb_vnn *vnn)
221 {
222         struct vnn_interface *i;
223         struct ctdb_interface *cur = NULL;
224         struct ctdb_interface *best = NULL;
225
226         for (i = vnn->ifaces; i != NULL; i = i->next) {
227
228                 cur = i->iface;
229
230                 if (!cur->link_up) {
231                         continue;
232                 }
233
234                 if (best == NULL) {
235                         best = cur;
236                         continue;
237                 }
238
239                 if (cur->references < best->references) {
240                         best = cur;
241                         continue;
242                 }
243         }
244
245         return best;
246 }
247
248 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
249                                      struct ctdb_vnn *vnn)
250 {
251         struct ctdb_interface *best = NULL;
252
253         if (vnn->iface) {
254                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
255                                    "still assigned to iface '%s'\n",
256                                    ctdb_addr_to_str(&vnn->public_address),
257                                    ctdb_vnn_iface_string(vnn)));
258                 return 0;
259         }
260
261         best = ctdb_vnn_best_iface(ctdb, vnn);
262         if (best == NULL) {
263                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
264                                   "cannot assign to iface any iface\n",
265                                   ctdb_addr_to_str(&vnn->public_address)));
266                 return -1;
267         }
268
269         vnn->iface = best;
270         best->references++;
271         vnn->pnn = ctdb->pnn;
272
273         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
274                            "now assigned to iface '%s' refs[%d]\n",
275                            ctdb_addr_to_str(&vnn->public_address),
276                            ctdb_vnn_iface_string(vnn),
277                            best->references));
278         return 0;
279 }
280
281 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
282                                     struct ctdb_vnn *vnn)
283 {
284         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
285                            "now unassigned (old iface '%s' refs[%d])\n",
286                            ctdb_addr_to_str(&vnn->public_address),
287                            ctdb_vnn_iface_string(vnn),
288                            vnn->iface?vnn->iface->references:0));
289         if (vnn->iface) {
290                 vnn->iface->references--;
291         }
292         vnn->iface = NULL;
293         if (vnn->pnn == ctdb->pnn) {
294                 vnn->pnn = -1;
295         }
296 }
297
298 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
299                                struct ctdb_vnn *vnn)
300 {
301         struct vnn_interface *i;
302
303         /* Nodes that are not RUNNING can not host IPs */
304         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
305                 return false;
306         }
307
308         if (vnn->delete_pending) {
309                 return false;
310         }
311
312         if (vnn->iface && vnn->iface->link_up) {
313                 return true;
314         }
315
316         for (i = vnn->ifaces; i != NULL; i = i->next) {
317                 if (i->iface->link_up) {
318                         return true;
319                 }
320         }
321
322         return false;
323 }
324
325 struct ctdb_takeover_arp {
326         struct ctdb_context *ctdb;
327         uint32_t count;
328         ctdb_sock_addr addr;
329         struct ctdb_tcp_array *tcparray;
330         struct ctdb_vnn *vnn;
331 };
332
333
334 /*
335   lists of tcp endpoints
336  */
337 struct ctdb_tcp_list {
338         struct ctdb_tcp_list *prev, *next;
339         struct ctdb_connection connection;
340 };
341
342 /*
343   list of clients to kill on IP release
344  */
345 struct ctdb_client_ip {
346         struct ctdb_client_ip *prev, *next;
347         struct ctdb_context *ctdb;
348         ctdb_sock_addr addr;
349         uint32_t client_id;
350 };
351
352
353 /*
354   send a gratuitous arp
355  */
356 static void ctdb_control_send_arp(struct tevent_context *ev,
357                                   struct tevent_timer *te,
358                                   struct timeval t, void *private_data)
359 {
360         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
361                                                         struct ctdb_takeover_arp);
362         int i, ret;
363         struct ctdb_tcp_array *tcparray;
364         const char *iface = ctdb_vnn_iface_string(arp->vnn);
365
366         ret = ctdb_sys_send_arp(&arp->addr, iface);
367         if (ret != 0) {
368                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
369                                   iface, strerror(errno)));
370         }
371
372         tcparray = arp->tcparray;
373         if (tcparray) {
374                 for (i=0;i<tcparray->num;i++) {
375                         struct ctdb_connection *tcon;
376
377                         tcon = &tcparray->connections[i];
378                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
379                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
380                                 ctdb_addr_to_str(&tcon->src),
381                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
382                         ret = ctdb_sys_send_tcp(
383                                 &tcon->src,
384                                 &tcon->dst,
385                                 0, 0, 0);
386                         if (ret != 0) {
387                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
388                                         ctdb_addr_to_str(&tcon->src)));
389                         }
390                 }
391         }
392
393         arp->count++;
394
395         if (arp->count == CTDB_ARP_REPEAT) {
396                 talloc_free(arp);
397                 return;
398         }
399
400         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
401                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
402                          ctdb_control_send_arp, arp);
403 }
404
405 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
406                                        struct ctdb_vnn *vnn)
407 {
408         struct ctdb_takeover_arp *arp;
409         struct ctdb_tcp_array *tcparray;
410
411         if (!vnn->takeover_ctx) {
412                 vnn->takeover_ctx = talloc_new(vnn);
413                 if (!vnn->takeover_ctx) {
414                         return -1;
415                 }
416         }
417
418         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
419         if (!arp) {
420                 return -1;
421         }
422
423         arp->ctdb = ctdb;
424         arp->addr = vnn->public_address;
425         arp->vnn  = vnn;
426
427         tcparray = vnn->tcp_array;
428         if (tcparray) {
429                 /* add all of the known tcp connections for this IP to the
430                    list of tcp connections to send tickle acks for */
431                 arp->tcparray = talloc_steal(arp, tcparray);
432
433                 vnn->tcp_array = NULL;
434                 vnn->tcp_update_needed = true;
435         }
436
437         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
438                          timeval_zero(), ctdb_control_send_arp, arp);
439
440         return 0;
441 }
442
443 struct ctdb_do_takeip_state {
444         struct ctdb_req_control_old *c;
445         struct ctdb_vnn *vnn;
446 };
447
448 /*
449   called when takeip event finishes
450  */
451 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
452                                     void *private_data)
453 {
454         struct ctdb_do_takeip_state *state =
455                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
456         int32_t ret;
457         TDB_DATA data;
458
459         if (status != 0) {
460                 if (status == -ETIME) {
461                         ctdb_ban_self(ctdb);
462                 }
463                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
464                                  ctdb_addr_to_str(&state->vnn->public_address),
465                                  ctdb_vnn_iface_string(state->vnn)));
466                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
467
468                 talloc_free(state);
469                 return;
470         }
471
472         if (ctdb->do_checkpublicip) {
473
474         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
475         if (ret != 0) {
476                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
477                 talloc_free(state);
478                 return;
479         }
480
481         }
482
483         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
484         data.dsize = strlen((char *)data.dptr) + 1;
485         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
486
487         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
488
489
490         /* the control succeeded */
491         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
492         talloc_free(state);
493         return;
494 }
495
496 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
497 {
498         state->vnn->update_in_flight = false;
499         return 0;
500 }
501
502 /*
503   take over an ip address
504  */
505 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
506                               struct ctdb_req_control_old *c,
507                               struct ctdb_vnn *vnn)
508 {
509         int ret;
510         struct ctdb_do_takeip_state *state;
511
512         if (vnn->update_in_flight) {
513                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
514                                     "update for this IP already in flight\n",
515                                     ctdb_addr_to_str(&vnn->public_address),
516                                     vnn->public_netmask_bits));
517                 return -1;
518         }
519
520         ret = ctdb_vnn_assign_iface(ctdb, vnn);
521         if (ret != 0) {
522                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
523                                  "assign a usable interface\n",
524                                  ctdb_addr_to_str(&vnn->public_address),
525                                  vnn->public_netmask_bits));
526                 return -1;
527         }
528
529         state = talloc(vnn, struct ctdb_do_takeip_state);
530         CTDB_NO_MEMORY(ctdb, state);
531
532         state->c = NULL;
533         state->vnn   = vnn;
534
535         vnn->update_in_flight = true;
536         talloc_set_destructor(state, ctdb_takeip_destructor);
537
538         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
539                             ctdb_addr_to_str(&vnn->public_address),
540                             vnn->public_netmask_bits,
541                             ctdb_vnn_iface_string(vnn)));
542
543         ret = ctdb_event_script_callback(ctdb,
544                                          state,
545                                          ctdb_do_takeip_callback,
546                                          state,
547                                          CTDB_EVENT_TAKE_IP,
548                                          "%s %s %u",
549                                          ctdb_vnn_iface_string(vnn),
550                                          ctdb_addr_to_str(&vnn->public_address),
551                                          vnn->public_netmask_bits);
552
553         if (ret != 0) {
554                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
555                         ctdb_addr_to_str(&vnn->public_address),
556                         ctdb_vnn_iface_string(vnn)));
557                 talloc_free(state);
558                 return -1;
559         }
560
561         state->c = talloc_steal(ctdb, c);
562         return 0;
563 }
564
565 struct ctdb_do_updateip_state {
566         struct ctdb_req_control_old *c;
567         struct ctdb_interface *old;
568         struct ctdb_vnn *vnn;
569 };
570
571 /*
572   called when updateip event finishes
573  */
574 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
575                                       void *private_data)
576 {
577         struct ctdb_do_updateip_state *state =
578                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
579         int32_t ret;
580
581         if (status != 0) {
582                 if (status == -ETIME) {
583                         ctdb_ban_self(ctdb);
584                 }
585                 DEBUG(DEBUG_ERR,
586                       ("Failed update of IP %s from interface %s to %s\n",
587                        ctdb_addr_to_str(&state->vnn->public_address),
588                        iface_string(state->old),
589                        ctdb_vnn_iface_string(state->vnn)));
590
591                 /*
592                  * All we can do is reset the old interface
593                  * and let the next run fix it
594                  */
595                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
596                 state->vnn->iface = state->old;
597                 state->vnn->iface->references++;
598
599                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
600                 talloc_free(state);
601                 return;
602         }
603
604         if (ctdb->do_checkpublicip) {
605
606         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
607         if (ret != 0) {
608                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
609                 talloc_free(state);
610                 return;
611         }
612
613         }
614
615         /* the control succeeded */
616         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
617         talloc_free(state);
618         return;
619 }
620
621 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
622 {
623         state->vnn->update_in_flight = false;
624         return 0;
625 }
626
627 /*
628   update (move) an ip address
629  */
630 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
631                                 struct ctdb_req_control_old *c,
632                                 struct ctdb_vnn *vnn)
633 {
634         int ret;
635         struct ctdb_do_updateip_state *state;
636         struct ctdb_interface *old = vnn->iface;
637         const char *old_name = iface_string(old);
638         const char *new_name;
639
640         if (vnn->update_in_flight) {
641                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
642                                     "update for this IP already in flight\n",
643                                     ctdb_addr_to_str(&vnn->public_address),
644                                     vnn->public_netmask_bits));
645                 return -1;
646         }
647
648         ctdb_vnn_unassign_iface(ctdb, vnn);
649         ret = ctdb_vnn_assign_iface(ctdb, vnn);
650         if (ret != 0) {
651                 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
652                                  "assign a usable interface (old iface '%s')\n",
653                                  ctdb_addr_to_str(&vnn->public_address),
654                                  vnn->public_netmask_bits,
655                                  old_name));
656                 return -1;
657         }
658
659         if (old == vnn->iface) {
660                 /* A benign update from one interface onto itself.
661                  * no need to run the eventscripts in this case, just return
662                  * success.
663                  */
664                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
665                 return 0;
666         }
667
668         state = talloc(vnn, struct ctdb_do_updateip_state);
669         CTDB_NO_MEMORY(ctdb, state);
670
671         state->c = NULL;
672         state->old = old;
673         state->vnn = vnn;
674
675         vnn->update_in_flight = true;
676         talloc_set_destructor(state, ctdb_updateip_destructor);
677
678         new_name = ctdb_vnn_iface_string(vnn);
679         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
680                             "interface %s to %s\n",
681                             ctdb_addr_to_str(&vnn->public_address),
682                             vnn->public_netmask_bits,
683                             old_name,
684                             new_name));
685
686         ret = ctdb_event_script_callback(ctdb,
687                                          state,
688                                          ctdb_do_updateip_callback,
689                                          state,
690                                          CTDB_EVENT_UPDATE_IP,
691                                          "%s %s %s %u",
692                                          old_name,
693                                          new_name,
694                                          ctdb_addr_to_str(&vnn->public_address),
695                                          vnn->public_netmask_bits);
696         if (ret != 0) {
697                 DEBUG(DEBUG_ERR,
698                       ("Failed update IP %s from interface %s to %s\n",
699                        ctdb_addr_to_str(&vnn->public_address),
700                        old_name, new_name));
701                 talloc_free(state);
702                 return -1;
703         }
704
705         state->c = talloc_steal(ctdb, c);
706         return 0;
707 }
708
709 /*
710   Find the vnn of the node that has a public ip address
711   returns -1 if the address is not known as a public address
712  */
713 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
714 {
715         struct ctdb_vnn *vnn;
716
717         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
718                 if (ctdb_same_ip(&vnn->public_address, addr)) {
719                         return vnn;
720                 }
721         }
722
723         return NULL;
724 }
725
726 /*
727   take over an ip address
728  */
729 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
730                                  struct ctdb_req_control_old *c,
731                                  TDB_DATA indata,
732                                  bool *async_reply)
733 {
734         int ret;
735         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
736         struct ctdb_vnn *vnn;
737         bool have_ip = false;
738         bool do_updateip = false;
739         bool do_takeip = false;
740         struct ctdb_interface *best_iface = NULL;
741
742         if (pip->pnn != ctdb->pnn) {
743                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
744                                  "with pnn %d, but we're node %d\n",
745                                  ctdb_addr_to_str(&pip->addr),
746                                  pip->pnn, ctdb->pnn));
747                 return -1;
748         }
749
750         /* update out vnn list */
751         vnn = find_public_ip_vnn(ctdb, &pip->addr);
752         if (vnn == NULL) {
753                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
754                         ctdb_addr_to_str(&pip->addr)));
755                 return 0;
756         }
757
758         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
759                 have_ip = ctdb_sys_have_ip(&pip->addr);
760         }
761         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
762         if (best_iface == NULL) {
763                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
764                                  "a usable interface (old %s, have_ip %d)\n",
765                                  ctdb_addr_to_str(&vnn->public_address),
766                                  vnn->public_netmask_bits,
767                                  ctdb_vnn_iface_string(vnn),
768                                  have_ip));
769                 return -1;
770         }
771
772         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
773                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
774                                   "and we have it on iface[%s], but it was assigned to node %d"
775                                   "and we are node %d, banning ourself\n",
776                                  ctdb_addr_to_str(&vnn->public_address),
777                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
778                 ctdb_ban_self(ctdb);
779                 return -1;
780         }
781
782         if (vnn->pnn == -1 && have_ip) {
783                 /* This will cause connections to be reset and
784                  * reestablished.  However, this is a very unusual
785                  * situation and doing this will completely repair the
786                  * inconsistency in the VNN.
787                  */
788                 DEBUG(DEBUG_WARNING,
789                       (__location__
790                        " Doing updateip for IP %s already on an interface\n",
791                        ctdb_addr_to_str(&vnn->public_address)));
792                 do_updateip = true;
793         }
794
795         if (vnn->iface) {
796                 if (vnn->iface != best_iface) {
797                         if (!vnn->iface->link_up) {
798                                 do_updateip = true;
799                         } else if (vnn->iface->references > (best_iface->references + 1)) {
800                                 /* only move when the rebalance gains something */
801                                         do_updateip = true;
802                         }
803                 }
804         }
805
806         if (!have_ip) {
807                 if (do_updateip) {
808                         ctdb_vnn_unassign_iface(ctdb, vnn);
809                         do_updateip = false;
810                 }
811                 do_takeip = true;
812         }
813
814         if (do_takeip) {
815                 ret = ctdb_do_takeip(ctdb, c, vnn);
816                 if (ret != 0) {
817                         return -1;
818                 }
819         } else if (do_updateip) {
820                 ret = ctdb_do_updateip(ctdb, c, vnn);
821                 if (ret != 0) {
822                         return -1;
823                 }
824         } else {
825                 /*
826                  * The interface is up and the kernel known the ip
827                  * => do nothing
828                  */
829                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
830                         ctdb_addr_to_str(&pip->addr),
831                         vnn->public_netmask_bits,
832                         ctdb_vnn_iface_string(vnn)));
833                 return 0;
834         }
835
836         /* tell ctdb_control.c that we will be replying asynchronously */
837         *async_reply = true;
838
839         return 0;
840 }
841
842 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
843 {
844         DLIST_REMOVE(ctdb->vnn, vnn);
845         ctdb_vnn_unassign_iface(ctdb, vnn);
846         ctdb_remove_orphaned_ifaces(ctdb, vnn);
847         talloc_free(vnn);
848 }
849
850 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
851                                         struct ctdb_vnn *vnn,
852                                         ctdb_sock_addr *addr)
853 {
854         TDB_DATA data;
855
856         /* Send a message to all clients of this node telling them
857          * that the cluster has been reconfigured and they should
858          * close any connections on this IP address
859          */
860         data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
861         data.dsize = strlen((char *)data.dptr)+1;
862         DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
863         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
864
865         ctdb_vnn_unassign_iface(ctdb, vnn);
866
867         /* Process the IP if it has been marked for deletion */
868         if (vnn->delete_pending) {
869                 do_delete_ip(ctdb, vnn);
870                 return NULL;
871         }
872
873         return vnn;
874 }
875
876 struct release_ip_callback_state {
877         struct ctdb_req_control_old *c;
878         ctdb_sock_addr *addr;
879         struct ctdb_vnn *vnn;
880         uint32_t target_pnn;
881 };
882
883 /*
884   called when releaseip event finishes
885  */
886 static void release_ip_callback(struct ctdb_context *ctdb, int status,
887                                 void *private_data)
888 {
889         struct release_ip_callback_state *state =
890                 talloc_get_type(private_data, struct release_ip_callback_state);
891
892         if (status == -ETIME) {
893                 ctdb_ban_self(ctdb);
894         }
895
896         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
897                 if  (ctdb_sys_have_ip(state->addr)) {
898                         DEBUG(DEBUG_ERR,
899                               ("IP %s still hosted during release IP callback, failing\n",
900                                ctdb_addr_to_str(state->addr)));
901                         ctdb_request_control_reply(ctdb, state->c,
902                                                    NULL, -1, NULL);
903                         talloc_free(state);
904                         return;
905                 }
906         }
907
908         state->vnn->pnn = state->target_pnn;
909         state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
910
911         /* the control succeeded */
912         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
913         talloc_free(state);
914 }
915
916 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
917 {
918         if (state->vnn != NULL) {
919                 state->vnn->update_in_flight = false;
920         }
921         return 0;
922 }
923
924 /*
925   release an ip address
926  */
927 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
928                                 struct ctdb_req_control_old *c,
929                                 TDB_DATA indata, 
930                                 bool *async_reply)
931 {
932         int ret;
933         struct release_ip_callback_state *state;
934         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
935         struct ctdb_vnn *vnn;
936         const char *iface;
937
938         /* update our vnn list */
939         vnn = find_public_ip_vnn(ctdb, &pip->addr);
940         if (vnn == NULL) {
941                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
942                         ctdb_addr_to_str(&pip->addr)));
943                 return 0;
944         }
945
946         /* stop any previous arps */
947         talloc_free(vnn->takeover_ctx);
948         vnn->takeover_ctx = NULL;
949
950         /* RELEASE_IP controls are sent to all nodes that should not
951          * be hosting a particular IP.  This serves 2 purposes.  The
952          * first is to help resolve any inconsistencies.  If a node
953          * does unexpectly host an IP then it will be released.  The
954          * 2nd is to use a "redundant release" to tell non-takeover
955          * nodes where an IP is moving to.  This is how "ctdb ip" can
956          * report the (likely) location of an IP by only asking the
957          * local node.  Redundant releases need to update the PNN but
958          * are otherwise ignored.
959          */
960         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
961                 if (!ctdb_sys_have_ip(&pip->addr)) {
962                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
963                                 ctdb_addr_to_str(&pip->addr),
964                                 vnn->public_netmask_bits,
965                                 ctdb_vnn_iface_string(vnn)));
966                         vnn->pnn = pip->pnn;
967                         ctdb_vnn_unassign_iface(ctdb, vnn);
968                         return 0;
969                 }
970         } else {
971                 if (vnn->iface == NULL) {
972                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
973                                            ctdb_addr_to_str(&pip->addr),
974                                            vnn->public_netmask_bits));
975                         vnn->pnn = pip->pnn;
976                         return 0;
977                 }
978         }
979
980         /* There is a potential race between take_ip and us because we
981          * update the VNN via a callback that run when the
982          * eventscripts have been run.  Avoid the race by allowing one
983          * update to be in flight at a time.
984          */
985         if (vnn->update_in_flight) {
986                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
987                                     "update for this IP already in flight\n",
988                                     ctdb_addr_to_str(&vnn->public_address),
989                                     vnn->public_netmask_bits));
990                 return -1;
991         }
992
993         iface = ctdb_vnn_iface_string(vnn);
994
995         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
996                 ctdb_addr_to_str(&pip->addr),
997                 vnn->public_netmask_bits,
998                 iface,
999                 pip->pnn));
1000
1001         state = talloc(ctdb, struct release_ip_callback_state);
1002         if (state == NULL) {
1003                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1004                                __FILE__, __LINE__);
1005                 return -1;
1006         }
1007
1008         state->c = NULL;
1009         state->addr = talloc(state, ctdb_sock_addr);
1010         if (state->addr == NULL) {
1011                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1012                                __FILE__, __LINE__);
1013                 talloc_free(state);
1014                 return -1;
1015         }
1016         *state->addr = pip->addr;
1017         state->target_pnn = pip->pnn;
1018         state->vnn   = vnn;
1019
1020         vnn->update_in_flight = true;
1021         talloc_set_destructor(state, ctdb_releaseip_destructor);
1022
1023         ret = ctdb_event_script_callback(ctdb, 
1024                                          state, release_ip_callback, state,
1025                                          CTDB_EVENT_RELEASE_IP,
1026                                          "%s %s %u",
1027                                          iface,
1028                                          ctdb_addr_to_str(&pip->addr),
1029                                          vnn->public_netmask_bits);
1030         if (ret != 0) {
1031                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1032                         ctdb_addr_to_str(&pip->addr),
1033                         ctdb_vnn_iface_string(vnn)));
1034                 talloc_free(state);
1035                 return -1;
1036         }
1037
1038         /* tell the control that we will be reply asynchronously */
1039         *async_reply = true;
1040         state->c = talloc_steal(state, c);
1041         return 0;
1042 }
1043
1044 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1045                                    ctdb_sock_addr *addr,
1046                                    unsigned mask, const char *ifaces,
1047                                    bool check_address)
1048 {
1049         struct ctdb_vnn      *vnn;
1050         char *tmp;
1051         const char *iface;
1052
1053         /* Verify that we don't have an entry for this IP yet */
1054         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1055                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1056                         DEBUG(DEBUG_ERR,
1057                               ("Duplicate public IP address '%s'\n",
1058                                ctdb_addr_to_str(addr)));
1059                         return -1;
1060                 }
1061         }
1062
1063         /* Create a new VNN structure for this IP address */
1064         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1065         if (vnn == NULL) {
1066                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1067                 return -1;
1068         }
1069         tmp = talloc_strdup(vnn, ifaces);
1070         if (tmp == NULL) {
1071                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1072                 talloc_free(vnn);
1073                 return -1;
1074         }
1075         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1076                 struct vnn_interface *vnn_iface;
1077                 struct ctdb_interface *i;
1078                 if (!ctdb_sys_check_iface_exists(iface)) {
1079                         DEBUG(DEBUG_ERR,
1080                               ("Unknown interface %s for public address %s\n",
1081                                iface, ctdb_addr_to_str(addr)));
1082                         talloc_free(vnn);
1083                         return -1;
1084                 }
1085
1086                 i = ctdb_add_local_iface(ctdb, iface);
1087                 if (i == NULL) {
1088                         DEBUG(DEBUG_ERR,
1089                               ("Failed to add interface '%s' "
1090                                "for public address %s\n",
1091                                iface, ctdb_addr_to_str(addr)));
1092                         talloc_free(vnn);
1093                         return -1;
1094                 }
1095
1096                 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1097                 if (vnn_iface == NULL) {
1098                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1099                         talloc_free(vnn);
1100                         return -1;
1101                 }
1102
1103                 vnn_iface->iface = i;
1104                 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1105         }
1106         talloc_free(tmp);
1107         vnn->public_address      = *addr;
1108         vnn->public_netmask_bits = mask;
1109         vnn->pnn                 = -1;
1110
1111         DLIST_ADD(ctdb->vnn, vnn);
1112
1113         return 0;
1114 }
1115
1116 /*
1117   setup the public address lists from a file
1118 */
1119 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1120 {
1121         char **lines;
1122         int nlines;
1123         int i;
1124
1125         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1126         if (lines == NULL) {
1127                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1128                 return -1;
1129         }
1130         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1131                 nlines--;
1132         }
1133
1134         for (i=0;i<nlines;i++) {
1135                 unsigned mask;
1136                 ctdb_sock_addr addr;
1137                 const char *addrstr;
1138                 const char *ifaces;
1139                 char *tok, *line;
1140
1141                 line = lines[i];
1142                 while ((*line == ' ') || (*line == '\t')) {
1143                         line++;
1144                 }
1145                 if (*line == '#') {
1146                         continue;
1147                 }
1148                 if (strcmp(line, "") == 0) {
1149                         continue;
1150                 }
1151                 tok = strtok(line, " \t");
1152                 addrstr = tok;
1153                 tok = strtok(NULL, " \t");
1154                 if (tok == NULL) {
1155                         if (NULL == ctdb->default_public_interface) {
1156                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1157                                          i+1));
1158                                 talloc_free(lines);
1159                                 return -1;
1160                         }
1161                         ifaces = ctdb->default_public_interface;
1162                 } else {
1163                         ifaces = tok;
1164                 }
1165
1166                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1167                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1168                         talloc_free(lines);
1169                         return -1;
1170                 }
1171                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1172                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1173                         talloc_free(lines);
1174                         return -1;
1175                 }
1176         }
1177
1178
1179         talloc_free(lines);
1180         return 0;
1181 }
1182
1183 /*
1184   destroy a ctdb_client_ip structure
1185  */
1186 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1187 {
1188         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1189                 ctdb_addr_to_str(&ip->addr),
1190                 ntohs(ip->addr.ip.sin_port),
1191                 ip->client_id));
1192
1193         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1194         return 0;
1195 }
1196
1197 /*
1198   called by a client to inform us of a TCP connection that it is managing
1199   that should tickled with an ACK when IP takeover is done
1200  */
1201 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1202                                 TDB_DATA indata)
1203 {
1204         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1205         struct ctdb_connection *tcp_sock = NULL;
1206         struct ctdb_tcp_list *tcp;
1207         struct ctdb_connection t;
1208         int ret;
1209         TDB_DATA data;
1210         struct ctdb_client_ip *ip;
1211         struct ctdb_vnn *vnn;
1212         ctdb_sock_addr addr;
1213
1214         /* If we don't have public IPs, tickles are useless */
1215         if (ctdb->vnn == NULL) {
1216                 return 0;
1217         }
1218
1219         tcp_sock = (struct ctdb_connection *)indata.dptr;
1220
1221         addr = tcp_sock->src;
1222         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1223         addr = tcp_sock->dst;
1224         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1225
1226         ZERO_STRUCT(addr);
1227         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1228         vnn = find_public_ip_vnn(ctdb, &addr);
1229         if (vnn == NULL) {
1230                 switch (addr.sa.sa_family) {
1231                 case AF_INET:
1232                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1233                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1234                                         ctdb_addr_to_str(&addr)));
1235                         }
1236                         break;
1237                 case AF_INET6:
1238                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1239                                 ctdb_addr_to_str(&addr)));
1240                         break;
1241                 default:
1242                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1243                 }
1244
1245                 return 0;
1246         }
1247
1248         if (vnn->pnn != ctdb->pnn) {
1249                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1250                         ctdb_addr_to_str(&addr),
1251                         client_id, client->pid));
1252                 /* failing this call will tell smbd to die */
1253                 return -1;
1254         }
1255
1256         ip = talloc(client, struct ctdb_client_ip);
1257         CTDB_NO_MEMORY(ctdb, ip);
1258
1259         ip->ctdb      = ctdb;
1260         ip->addr      = addr;
1261         ip->client_id = client_id;
1262         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1263         DLIST_ADD(ctdb->client_ip_list, ip);
1264
1265         tcp = talloc(client, struct ctdb_tcp_list);
1266         CTDB_NO_MEMORY(ctdb, tcp);
1267
1268         tcp->connection.src = tcp_sock->src;
1269         tcp->connection.dst = tcp_sock->dst;
1270
1271         DLIST_ADD(client->tcp_list, tcp);
1272
1273         t.src = tcp_sock->src;
1274         t.dst = tcp_sock->dst;
1275
1276         data.dptr = (uint8_t *)&t;
1277         data.dsize = sizeof(t);
1278
1279         switch (addr.sa.sa_family) {
1280         case AF_INET:
1281                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1282                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1283                         ctdb_addr_to_str(&tcp_sock->src),
1284                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1285                 break;
1286         case AF_INET6:
1287                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1288                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1289                         ctdb_addr_to_str(&tcp_sock->src),
1290                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1291                 break;
1292         default:
1293                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1294         }
1295
1296
1297         /* tell all nodes about this tcp connection */
1298         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1299                                        CTDB_CONTROL_TCP_ADD,
1300                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1301         if (ret != 0) {
1302                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1303                 return -1;
1304         }
1305
1306         return 0;
1307 }
1308
1309 /*
1310   find a tcp address on a list
1311  */
1312 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1313                                            struct ctdb_connection *tcp)
1314 {
1315         int i;
1316
1317         if (array == NULL) {
1318                 return NULL;
1319         }
1320
1321         for (i=0;i<array->num;i++) {
1322                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1323                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1324                         return &array->connections[i];
1325                 }
1326         }
1327         return NULL;
1328 }
1329
1330
1331
1332 /*
1333   called by a daemon to inform us of a TCP connection that one of its
1334   clients managing that should tickled with an ACK when IP takeover is
1335   done
1336  */
1337 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1338 {
1339         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1340         struct ctdb_tcp_array *tcparray;
1341         struct ctdb_connection tcp;
1342         struct ctdb_vnn *vnn;
1343
1344         /* If we don't have public IPs, tickles are useless */
1345         if (ctdb->vnn == NULL) {
1346                 return 0;
1347         }
1348
1349         vnn = find_public_ip_vnn(ctdb, &p->dst);
1350         if (vnn == NULL) {
1351                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1352                         ctdb_addr_to_str(&p->dst)));
1353
1354                 return -1;
1355         }
1356
1357
1358         tcparray = vnn->tcp_array;
1359
1360         /* If this is the first tickle */
1361         if (tcparray == NULL) {
1362                 tcparray = talloc(vnn, struct ctdb_tcp_array);
1363                 CTDB_NO_MEMORY(ctdb, tcparray);
1364                 vnn->tcp_array = tcparray;
1365
1366                 tcparray->num = 0;
1367                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1368                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1369
1370                 tcparray->connections[tcparray->num].src = p->src;
1371                 tcparray->connections[tcparray->num].dst = p->dst;
1372                 tcparray->num++;
1373
1374                 if (tcp_update_needed) {
1375                         vnn->tcp_update_needed = true;
1376                 }
1377                 return 0;
1378         }
1379
1380
1381         /* Do we already have this tickle ?*/
1382         tcp.src = p->src;
1383         tcp.dst = p->dst;
1384         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1385                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1386                         ctdb_addr_to_str(&tcp.dst),
1387                         ntohs(tcp.dst.ip.sin_port),
1388                         vnn->pnn));
1389                 return 0;
1390         }
1391
1392         /* A new tickle, we must add it to the array */
1393         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1394                                         struct ctdb_connection,
1395                                         tcparray->num+1);
1396         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1397
1398         tcparray->connections[tcparray->num].src = p->src;
1399         tcparray->connections[tcparray->num].dst = p->dst;
1400         tcparray->num++;
1401
1402         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1403                 ctdb_addr_to_str(&tcp.dst),
1404                 ntohs(tcp.dst.ip.sin_port),
1405                 vnn->pnn));
1406
1407         if (tcp_update_needed) {
1408                 vnn->tcp_update_needed = true;
1409         }
1410
1411         return 0;
1412 }
1413
1414
1415 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1416 {
1417         struct ctdb_connection *tcpp;
1418
1419         if (vnn == NULL) {
1420                 return;
1421         }
1422
1423         /* if the array is empty we cant remove it
1424            and we don't need to do anything
1425          */
1426         if (vnn->tcp_array == NULL) {
1427                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1428                         ctdb_addr_to_str(&conn->dst),
1429                         ntohs(conn->dst.ip.sin_port)));
1430                 return;
1431         }
1432
1433
1434         /* See if we know this connection
1435            if we don't know this connection  then we dont need to do anything
1436          */
1437         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1438         if (tcpp == NULL) {
1439                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1440                         ctdb_addr_to_str(&conn->dst),
1441                         ntohs(conn->dst.ip.sin_port)));
1442                 return;
1443         }
1444
1445
1446         /* We need to remove this entry from the array.
1447            Instead of allocating a new array and copying data to it
1448            we cheat and just copy the last entry in the existing array
1449            to the entry that is to be removed and just shring the 
1450            ->num field
1451          */
1452         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1453         vnn->tcp_array->num--;
1454
1455         /* If we deleted the last entry we also need to remove the entire array
1456          */
1457         if (vnn->tcp_array->num == 0) {
1458                 talloc_free(vnn->tcp_array);
1459                 vnn->tcp_array = NULL;
1460         }               
1461
1462         vnn->tcp_update_needed = true;
1463
1464         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1465                 ctdb_addr_to_str(&conn->src),
1466                 ntohs(conn->src.ip.sin_port)));
1467 }
1468
1469
1470 /*
1471   called by a daemon to inform us of a TCP connection that one of its
1472   clients used are no longer needed in the tickle database
1473  */
1474 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1475 {
1476         struct ctdb_vnn *vnn;
1477         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1478
1479         /* If we don't have public IPs, tickles are useless */
1480         if (ctdb->vnn == NULL) {
1481                 return 0;
1482         }
1483
1484         vnn = find_public_ip_vnn(ctdb, &conn->dst);
1485         if (vnn == NULL) {
1486                 DEBUG(DEBUG_ERR,
1487                       (__location__ " unable to find public address %s\n",
1488                        ctdb_addr_to_str(&conn->dst)));
1489                 return 0;
1490         }
1491
1492         ctdb_remove_connection(vnn, conn);
1493
1494         return 0;
1495 }
1496
1497
1498 /*
1499   Called when another daemon starts - causes all tickles for all
1500   public addresses we are serving to be sent to the new node on the
1501   next check.  This actually causes the next scheduled call to
1502   tdb_update_tcp_tickles() to update all nodes.  This is simple and
1503   doesn't require careful error handling.
1504  */
1505 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1506 {
1507         struct ctdb_vnn *vnn;
1508
1509         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1510                            (unsigned long) pnn));
1511
1512         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1513                 vnn->tcp_update_needed = true;
1514         }
1515
1516         return 0;
1517 }
1518
1519
1520 /*
1521   called when a client structure goes away - hook to remove
1522   elements from the tcp_list in all daemons
1523  */
1524 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1525 {
1526         while (client->tcp_list) {
1527                 struct ctdb_vnn *vnn;
1528                 struct ctdb_tcp_list *tcp = client->tcp_list;
1529                 struct ctdb_connection *conn = &tcp->connection;
1530
1531                 DLIST_REMOVE(client->tcp_list, tcp);
1532
1533                 vnn = find_public_ip_vnn(client->ctdb,
1534                                          &conn->dst);
1535                 if (vnn == NULL) {
1536                         DEBUG(DEBUG_ERR,
1537                               (__location__ " unable to find public address %s\n",
1538                                ctdb_addr_to_str(&conn->dst)));
1539                         continue;
1540                 }
1541
1542                 /* If the IP address is hosted on this node then
1543                  * remove the connection. */
1544                 if (vnn->pnn == client->ctdb->pnn) {
1545                         ctdb_remove_connection(vnn, conn);
1546                 }
1547
1548                 /* Otherwise this function has been called because the
1549                  * server IP address has been released to another node
1550                  * and the client has exited.  This means that we
1551                  * should not delete the connection information.  The
1552                  * takeover node processes connections too. */
1553         }
1554 }
1555
1556
1557 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1558 {
1559         struct ctdb_vnn *vnn, *next;
1560         int count = 0;
1561
1562         if (ctdb->tunable.disable_ip_failover == 1) {
1563                 return;
1564         }
1565
1566         for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1567                 /* vnn can be freed below in release_ip_post() */
1568                 next = vnn->next;
1569
1570                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1571                         ctdb_vnn_unassign_iface(ctdb, vnn);
1572                         continue;
1573                 }
1574
1575                 /* Don't allow multiple releases at once.  Some code,
1576                  * particularly ctdb_tickle_sentenced_connections() is
1577                  * not re-entrant */
1578                 if (vnn->update_in_flight) {
1579                         DEBUG(DEBUG_WARNING,
1580                               (__location__
1581                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
1582                                     ctdb_addr_to_str(&vnn->public_address),
1583                                     vnn->public_netmask_bits,
1584                                     ctdb_vnn_iface_string(vnn)));
1585                         continue;
1586                 }
1587                 vnn->update_in_flight = true;
1588
1589                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1590                                     ctdb_addr_to_str(&vnn->public_address),
1591                                     vnn->public_netmask_bits,
1592                                     ctdb_vnn_iface_string(vnn)));
1593
1594                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1595                                        ctdb_vnn_iface_string(vnn),
1596                                        ctdb_addr_to_str(&vnn->public_address),
1597                                        vnn->public_netmask_bits);
1598                 /* releaseip timeouts are converted to success, so to
1599                  * detect failures just check if the IP address is
1600                  * still there...
1601                  */
1602                 if (ctdb_sys_have_ip(&vnn->public_address)) {
1603                         DEBUG(DEBUG_ERR,
1604                               (__location__
1605                                " IP address %s not released\n",
1606                                ctdb_addr_to_str(&vnn->public_address)));
1607                         vnn->update_in_flight = false;
1608                         continue;
1609                 }
1610
1611                 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1612                 if (vnn != NULL) {
1613                         vnn->update_in_flight = false;
1614                 }
1615                 count++;
1616         }
1617
1618         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1619 }
1620
1621
1622 /*
1623   get list of public IPs
1624  */
1625 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1626                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
1627 {
1628         int i, num, len;
1629         struct ctdb_public_ip_list_old *ips;
1630         struct ctdb_vnn *vnn;
1631         bool only_available = false;
1632
1633         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1634                 only_available = true;
1635         }
1636
1637         /* count how many public ip structures we have */
1638         num = 0;
1639         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1640                 num++;
1641         }
1642
1643         len = offsetof(struct ctdb_public_ip_list_old, ips) +
1644                 num*sizeof(struct ctdb_public_ip);
1645         ips = talloc_zero_size(outdata, len);
1646         CTDB_NO_MEMORY(ctdb, ips);
1647
1648         i = 0;
1649         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1650                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1651                         continue;
1652                 }
1653                 ips->ips[i].pnn  = vnn->pnn;
1654                 ips->ips[i].addr = vnn->public_address;
1655                 i++;
1656         }
1657         ips->num = i;
1658         len = offsetof(struct ctdb_public_ip_list_old, ips) +
1659                 i*sizeof(struct ctdb_public_ip);
1660
1661         outdata->dsize = len;
1662         outdata->dptr  = (uint8_t *)ips;
1663
1664         return 0;
1665 }
1666
1667
1668 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1669                                         struct ctdb_req_control_old *c,
1670                                         TDB_DATA indata,
1671                                         TDB_DATA *outdata)
1672 {
1673         int i, num, len;
1674         ctdb_sock_addr *addr;
1675         struct ctdb_public_ip_info_old *info;
1676         struct ctdb_vnn *vnn;
1677         struct vnn_interface *iface;
1678
1679         addr = (ctdb_sock_addr *)indata.dptr;
1680
1681         vnn = find_public_ip_vnn(ctdb, addr);
1682         if (vnn == NULL) {
1683                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1684                                  "'%s'not a public address\n",
1685                                  ctdb_addr_to_str(addr)));
1686                 return -1;
1687         }
1688
1689         /* count how many public ip structures we have */
1690         num = 0;
1691         for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1692                 num++;
1693         }
1694
1695         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1696                 num*sizeof(struct ctdb_iface);
1697         info = talloc_zero_size(outdata, len);
1698         CTDB_NO_MEMORY(ctdb, info);
1699
1700         info->ip.addr = vnn->public_address;
1701         info->ip.pnn = vnn->pnn;
1702         info->active_idx = 0xFFFFFFFF;
1703
1704         i = 0;
1705         for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1706                 struct ctdb_interface *cur;
1707
1708                 cur = iface->iface;
1709                 if (vnn->iface == cur) {
1710                         info->active_idx = i;
1711                 }
1712                 strncpy(info->ifaces[i].name, cur->name,
1713                         sizeof(info->ifaces[i].name));
1714                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1715                 info->ifaces[i].link_state = cur->link_up;
1716                 info->ifaces[i].references = cur->references;
1717
1718                 i++;
1719         }
1720         info->num = i;
1721         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1722                 i*sizeof(struct ctdb_iface);
1723
1724         outdata->dsize = len;
1725         outdata->dptr  = (uint8_t *)info;
1726
1727         return 0;
1728 }
1729
1730 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1731                                 struct ctdb_req_control_old *c,
1732                                 TDB_DATA *outdata)
1733 {
1734         int i, num, len;
1735         struct ctdb_iface_list_old *ifaces;
1736         struct ctdb_interface *cur;
1737
1738         /* count how many public ip structures we have */
1739         num = 0;
1740         for (cur=ctdb->ifaces;cur;cur=cur->next) {
1741                 num++;
1742         }
1743
1744         len = offsetof(struct ctdb_iface_list_old, ifaces) +
1745                 num*sizeof(struct ctdb_iface);
1746         ifaces = talloc_zero_size(outdata, len);
1747         CTDB_NO_MEMORY(ctdb, ifaces);
1748
1749         i = 0;
1750         for (cur=ctdb->ifaces;cur;cur=cur->next) {
1751                 strncpy(ifaces->ifaces[i].name, cur->name,
1752                         sizeof(ifaces->ifaces[i].name));
1753                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1754                 ifaces->ifaces[i].link_state = cur->link_up;
1755                 ifaces->ifaces[i].references = cur->references;
1756                 i++;
1757         }
1758         ifaces->num = i;
1759         len = offsetof(struct ctdb_iface_list_old, ifaces) +
1760                 i*sizeof(struct ctdb_iface);
1761
1762         outdata->dsize = len;
1763         outdata->dptr  = (uint8_t *)ifaces;
1764
1765         return 0;
1766 }
1767
1768 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1769                                     struct ctdb_req_control_old *c,
1770                                     TDB_DATA indata)
1771 {
1772         struct ctdb_iface *info;
1773         struct ctdb_interface *iface;
1774         bool link_up = false;
1775
1776         info = (struct ctdb_iface *)indata.dptr;
1777
1778         if (info->name[CTDB_IFACE_SIZE] != '\0') {
1779                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1780                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1781                                   len, len, info->name));
1782                 return -1;
1783         }
1784
1785         switch (info->link_state) {
1786         case 0:
1787                 link_up = false;
1788                 break;
1789         case 1:
1790                 link_up = true;
1791                 break;
1792         default:
1793                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1794                                   (unsigned int)info->link_state));
1795                 return -1;
1796         }
1797
1798         if (info->references != 0) {
1799                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1800                                   (unsigned int)info->references));
1801                 return -1;
1802         }
1803
1804         iface = ctdb_find_iface(ctdb, info->name);
1805         if (iface == NULL) {
1806                 return -1;
1807         }
1808
1809         if (link_up == iface->link_up) {
1810                 return 0;
1811         }
1812
1813         DEBUG(DEBUG_ERR,
1814               ("iface[%s] has changed it's link status %s => %s\n",
1815                iface->name,
1816                iface->link_up?"up":"down",
1817                link_up?"up":"down"));
1818
1819         iface->link_up = link_up;
1820         return 0;
1821 }
1822
1823
1824 /*
1825   called by a daemon to inform us of the entire list of TCP tickles for
1826   a particular public address.
1827   this control should only be sent by the node that is currently serving
1828   that public address.
1829  */
1830 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1831 {
1832         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1833         struct ctdb_tcp_array *tcparray;
1834         struct ctdb_vnn *vnn;
1835
1836         /* We must at least have tickles.num or else we cant verify the size
1837            of the received data blob
1838          */
1839         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1840                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1841                 return -1;
1842         }
1843
1844         /* verify that the size of data matches what we expect */
1845         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1846                          + sizeof(struct ctdb_connection) * list->num) {
1847                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1848                 return -1;
1849         }
1850
1851         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1852                            ctdb_addr_to_str(&list->addr)));
1853
1854         vnn = find_public_ip_vnn(ctdb, &list->addr);
1855         if (vnn == NULL) {
1856                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1857                         ctdb_addr_to_str(&list->addr)));
1858
1859                 return 1;
1860         }
1861
1862         if (vnn->pnn == ctdb->pnn) {
1863                 DEBUG(DEBUG_INFO,
1864                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1865                        ctdb_addr_to_str(&list->addr)));
1866                 return 0;
1867         }
1868
1869         /* remove any old ticklelist we might have */
1870         talloc_free(vnn->tcp_array);
1871         vnn->tcp_array = NULL;
1872
1873         tcparray = talloc(vnn, struct ctdb_tcp_array);
1874         CTDB_NO_MEMORY(ctdb, tcparray);
1875
1876         tcparray->num = list->num;
1877
1878         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1879         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1880
1881         memcpy(tcparray->connections, &list->connections[0],
1882                sizeof(struct ctdb_connection)*tcparray->num);
1883
1884         /* We now have a new fresh tickle list array for this vnn */
1885         vnn->tcp_array = tcparray;
1886
1887         return 0;
1888 }
1889
1890 /*
1891   called to return the full list of tickles for the puclic address associated 
1892   with the provided vnn
1893  */
1894 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1895 {
1896         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1897         struct ctdb_tickle_list_old *list;
1898         struct ctdb_tcp_array *tcparray;
1899         int num, i;
1900         struct ctdb_vnn *vnn;
1901         unsigned port;
1902
1903         vnn = find_public_ip_vnn(ctdb, addr);
1904         if (vnn == NULL) {
1905                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1906                         ctdb_addr_to_str(addr)));
1907
1908                 return 1;
1909         }
1910
1911         port = ctdb_addr_to_port(addr);
1912
1913         tcparray = vnn->tcp_array;
1914         num = 0;
1915         if (tcparray != NULL) {
1916                 if (port == 0) {
1917                         /* All connections */
1918                         num = tcparray->num;
1919                 } else {
1920                         /* Count connections for port */
1921                         for (i = 0; i < tcparray->num; i++) {
1922                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1923                                         num++;
1924                                 }
1925                         }
1926                 }
1927         }
1928
1929         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
1930                         + sizeof(struct ctdb_connection) * num;
1931
1932         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1933         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1934         list = (struct ctdb_tickle_list_old *)outdata->dptr;
1935
1936         list->addr = *addr;
1937         list->num = num;
1938
1939         if (num == 0) {
1940                 return 0;
1941         }
1942
1943         num = 0;
1944         for (i = 0; i < tcparray->num; i++) {
1945                 if (port == 0 || \
1946                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1947                         list->connections[num] = tcparray->connections[i];
1948                         num++;
1949                 }
1950         }
1951
1952         return 0;
1953 }
1954
1955
1956 /*
1957   set the list of all tcp tickles for a public address
1958  */
1959 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
1960                                             ctdb_sock_addr *addr,
1961                                             struct ctdb_tcp_array *tcparray)
1962 {
1963         int ret, num;
1964         TDB_DATA data;
1965         struct ctdb_tickle_list_old *list;
1966
1967         if (tcparray) {
1968                 num = tcparray->num;
1969         } else {
1970                 num = 0;
1971         }
1972
1973         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
1974                         sizeof(struct ctdb_connection) * num;
1975         data.dptr = talloc_size(ctdb, data.dsize);
1976         CTDB_NO_MEMORY(ctdb, data.dptr);
1977
1978         list = (struct ctdb_tickle_list_old *)data.dptr;
1979         list->addr = *addr;
1980         list->num = num;
1981         if (tcparray) {
1982                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
1983         }
1984
1985         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
1986                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1987                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1988         if (ret != 0) {
1989                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1990                 return -1;
1991         }
1992
1993         talloc_free(data.dptr);
1994
1995         return ret;
1996 }
1997
1998
1999 /*
2000   perform tickle updates if required
2001  */
2002 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2003                                     struct tevent_timer *te,
2004                                     struct timeval t, void *private_data)
2005 {
2006         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2007         int ret;
2008         struct ctdb_vnn *vnn;
2009
2010         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2011                 /* we only send out updates for public addresses that 
2012                    we have taken over
2013                  */
2014                 if (ctdb->pnn != vnn->pnn) {
2015                         continue;
2016                 }
2017                 /* We only send out the updates if we need to */
2018                 if (!vnn->tcp_update_needed) {
2019                         continue;
2020                 }
2021                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2022                                                        &vnn->public_address,
2023                                                        vnn->tcp_array);
2024                 if (ret != 0) {
2025                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2026                                 ctdb_addr_to_str(&vnn->public_address)));
2027                 } else {
2028                         DEBUG(DEBUG_INFO,
2029                               ("Sent tickle update for public address %s\n",
2030                                ctdb_addr_to_str(&vnn->public_address)));
2031                         vnn->tcp_update_needed = false;
2032                 }
2033         }
2034
2035         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2036                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2037                          ctdb_update_tcp_tickles, ctdb);
2038 }
2039
2040 /*
2041   start periodic update of tcp tickles
2042  */
2043 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2044 {
2045         ctdb->tickle_update_context = talloc_new(ctdb);
2046
2047         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2048                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2049                          ctdb_update_tcp_tickles, ctdb);
2050 }
2051
2052
2053
2054
2055 struct control_gratious_arp {
2056         struct ctdb_context *ctdb;
2057         ctdb_sock_addr addr;
2058         const char *iface;
2059         int count;
2060 };
2061
2062 /*
2063   send a control_gratuitous arp
2064  */
2065 static void send_gratious_arp(struct tevent_context *ev,
2066                               struct tevent_timer *te,
2067                               struct timeval t, void *private_data)
2068 {
2069         int ret;
2070         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2071                                                         struct control_gratious_arp);
2072
2073         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2074         if (ret != 0) {
2075                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2076                                  arp->iface, strerror(errno)));
2077         }
2078
2079
2080         arp->count++;
2081         if (arp->count == CTDB_ARP_REPEAT) {
2082                 talloc_free(arp);
2083                 return;
2084         }
2085
2086         tevent_add_timer(arp->ctdb->ev, arp,
2087                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2088                          send_gratious_arp, arp);
2089 }
2090
2091
2092 /*
2093   send a gratious arp 
2094  */
2095 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2096 {
2097         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2098         struct control_gratious_arp *arp;
2099
2100         /* verify the size of indata */
2101         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2102                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2103                                  (unsigned)indata.dsize, 
2104                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2105                 return -1;
2106         }
2107         if (indata.dsize != 
2108                 ( offsetof(struct ctdb_addr_info_old, iface)
2109                 + gratious_arp->len ) ){
2110
2111                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2112                         "but should be %u bytes\n", 
2113                          (unsigned)indata.dsize, 
2114                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2115                 return -1;
2116         }
2117
2118
2119         arp = talloc(ctdb, struct control_gratious_arp);
2120         CTDB_NO_MEMORY(ctdb, arp);
2121
2122         arp->ctdb  = ctdb;
2123         arp->addr   = gratious_arp->addr;
2124         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2125         CTDB_NO_MEMORY(ctdb, arp->iface);
2126         arp->count = 0;
2127
2128         tevent_add_timer(arp->ctdb->ev, arp,
2129                          timeval_zero(), send_gratious_arp, arp);
2130
2131         return 0;
2132 }
2133
2134 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2135 {
2136         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2137         int ret;
2138
2139         /* verify the size of indata */
2140         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2141                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2142                 return -1;
2143         }
2144         if (indata.dsize != 
2145                 ( offsetof(struct ctdb_addr_info_old, iface)
2146                 + pub->len ) ){
2147
2148                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2149                         "but should be %u bytes\n", 
2150                          (unsigned)indata.dsize, 
2151                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2152                 return -1;
2153         }
2154
2155         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2156
2157         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2158
2159         if (ret != 0) {
2160                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2161                 return -1;
2162         }
2163
2164         return 0;
2165 }
2166
2167 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2168 {
2169         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2170         struct ctdb_vnn *vnn;
2171
2172         /* verify the size of indata */
2173         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2174                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2175                 return -1;
2176         }
2177         if (indata.dsize != 
2178                 ( offsetof(struct ctdb_addr_info_old, iface)
2179                 + pub->len ) ){
2180
2181                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2182                         "but should be %u bytes\n", 
2183                          (unsigned)indata.dsize, 
2184                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2185                 return -1;
2186         }
2187
2188         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2189
2190         /* walk over all public addresses until we find a match */
2191         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2192                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2193                         if (vnn->pnn == ctdb->pnn) {
2194                                 /* This IP is currently being hosted.
2195                                  * Defer the deletion until the next
2196                                  * takeover run. "ctdb reloadips" will
2197                                  * always cause a takeover run.  "ctdb
2198                                  * delip" will now need an explicit
2199                                  * "ctdb ipreallocated" afterwards. */
2200                                 vnn->delete_pending = true;
2201                         } else {
2202                                 /* This IP is not hosted on the
2203                                  * current node so just delete it
2204                                  * now. */
2205                                 do_delete_ip(ctdb, vnn);
2206                         }
2207
2208                         return 0;
2209                 }
2210         }
2211
2212         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2213                          ctdb_addr_to_str(&pub->addr)));
2214         return -1;
2215 }
2216
2217
2218 struct ipreallocated_callback_state {
2219         struct ctdb_req_control_old *c;
2220 };
2221
2222 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2223                                         int status, void *p)
2224 {
2225         struct ipreallocated_callback_state *state =
2226                 talloc_get_type(p, struct ipreallocated_callback_state);
2227
2228         if (status != 0) {
2229                 DEBUG(DEBUG_ERR,
2230                       (" \"ipreallocated\" event script failed (status %d)\n",
2231                        status));
2232                 if (status == -ETIME) {
2233                         ctdb_ban_self(ctdb);
2234                 }
2235         }
2236
2237         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2238         talloc_free(state);
2239 }
2240
2241 /* A control to run the ipreallocated event */
2242 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2243                                    struct ctdb_req_control_old *c,
2244                                    bool *async_reply)
2245 {
2246         int ret;
2247         struct ipreallocated_callback_state *state;
2248
2249         state = talloc(ctdb, struct ipreallocated_callback_state);
2250         CTDB_NO_MEMORY(ctdb, state);
2251
2252         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2253
2254         ret = ctdb_event_script_callback(ctdb, state,
2255                                          ctdb_ipreallocated_callback, state,
2256                                          CTDB_EVENT_IPREALLOCATED,
2257                                          "%s", "");
2258
2259         if (ret != 0) {
2260                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2261                 talloc_free(state);
2262                 return -1;
2263         }
2264
2265         /* tell the control that we will be reply asynchronously */
2266         state->c    = talloc_steal(state, c);
2267         *async_reply = true;
2268
2269         return 0;
2270 }
2271
2272
2273 struct ctdb_reloadips_handle {
2274         struct ctdb_context *ctdb;
2275         struct ctdb_req_control_old *c;
2276         int status;
2277         int fd[2];
2278         pid_t child;
2279         struct tevent_fd *fde;
2280 };
2281
2282 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2283 {
2284         if (h == h->ctdb->reload_ips) {
2285                 h->ctdb->reload_ips = NULL;
2286         }
2287         if (h->c != NULL) {
2288                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2289                 h->c = NULL;
2290         }
2291         ctdb_kill(h->ctdb, h->child, SIGKILL);
2292         return 0;
2293 }
2294
2295 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2296                                          struct tevent_timer *te,
2297                                          struct timeval t, void *private_data)
2298 {
2299         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2300
2301         talloc_free(h);
2302 }
2303
2304 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2305                                          struct tevent_fd *fde,
2306                                          uint16_t flags, void *private_data)
2307 {
2308         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2309
2310         char res;
2311         int ret;
2312
2313         ret = sys_read(h->fd[0], &res, 1);
2314         if (ret < 1 || res != 0) {
2315                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2316                 res = 1;
2317         }
2318         h->status = res;
2319
2320         talloc_free(h);
2321 }
2322
2323 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2324 {
2325         TALLOC_CTX *mem_ctx = talloc_new(NULL);
2326         struct ctdb_public_ip_list_old *ips;
2327         struct ctdb_vnn *vnn;
2328         struct client_async_data *async_data;
2329         struct timeval timeout;
2330         TDB_DATA data;
2331         struct ctdb_client_control_state *state;
2332         bool first_add;
2333         int i, ret;
2334
2335         CTDB_NO_MEMORY(ctdb, mem_ctx);
2336
2337         /* Read IPs from local node */
2338         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2339                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
2340         if (ret != 0) {
2341                 DEBUG(DEBUG_ERR,
2342                       ("Unable to fetch public IPs from local node\n"));
2343                 talloc_free(mem_ctx);
2344                 return -1;
2345         }
2346
2347         /* Read IPs file - this is safe since this is a child process */
2348         ctdb->vnn = NULL;
2349         if (ctdb_set_public_addresses(ctdb, false) != 0) {
2350                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2351                 talloc_free(mem_ctx);
2352                 return -1;
2353         }
2354
2355         async_data = talloc_zero(mem_ctx, struct client_async_data);
2356         CTDB_NO_MEMORY(ctdb, async_data);
2357
2358         /* Compare IPs between node and file for IPs to be deleted */
2359         for (i = 0; i < ips->num; i++) {
2360                 /* */
2361                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2362                         if (ctdb_same_ip(&vnn->public_address,
2363                                          &ips->ips[i].addr)) {
2364                                 /* IP is still in file */
2365                                 break;
2366                         }
2367                 }
2368
2369                 if (vnn == NULL) {
2370                         /* Delete IP ips->ips[i] */
2371                         struct ctdb_addr_info_old *pub;
2372
2373                         DEBUG(DEBUG_NOTICE,
2374                               ("IP %s no longer configured, deleting it\n",
2375                                ctdb_addr_to_str(&ips->ips[i].addr)));
2376
2377                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2378                         CTDB_NO_MEMORY(ctdb, pub);
2379
2380                         pub->addr  = ips->ips[i].addr;
2381                         pub->mask  = 0;
2382                         pub->len   = 0;
2383
2384                         timeout = TAKEOVER_TIMEOUT();
2385
2386                         data.dsize = offsetof(struct ctdb_addr_info_old,
2387                                               iface) + pub->len;
2388                         data.dptr = (uint8_t *)pub;
2389
2390                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2391                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
2392                                                   0, data, async_data,
2393                                                   &timeout, NULL);
2394                         if (state == NULL) {
2395                                 DEBUG(DEBUG_ERR,
2396                                       (__location__
2397                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2398                                 goto failed;
2399                         }
2400
2401                         ctdb_client_async_add(async_data, state);
2402                 }
2403         }
2404
2405         /* Compare IPs between node and file for IPs to be added */
2406         first_add = true;
2407         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2408                 for (i = 0; i < ips->num; i++) {
2409                         if (ctdb_same_ip(&vnn->public_address,
2410                                          &ips->ips[i].addr)) {
2411                                 /* IP already on node */
2412                                 break;
2413                         }
2414                 }
2415                 if (i == ips->num) {
2416                         /* Add IP ips->ips[i] */
2417                         struct ctdb_addr_info_old *pub;
2418                         const char *ifaces = NULL;
2419                         uint32_t len;
2420                         struct vnn_interface *iface = NULL;
2421
2422                         DEBUG(DEBUG_NOTICE,
2423                               ("New IP %s configured, adding it\n",
2424                                ctdb_addr_to_str(&vnn->public_address)));
2425                         if (first_add) {
2426                                 uint32_t pnn = ctdb_get_pnn(ctdb);
2427
2428                                 data.dsize = sizeof(pnn);
2429                                 data.dptr  = (uint8_t *)&pnn;
2430
2431                                 ret = ctdb_client_send_message(
2432                                         ctdb,
2433                                         CTDB_BROADCAST_CONNECTED,
2434                                         CTDB_SRVID_REBALANCE_NODE,
2435                                         data);
2436                                 if (ret != 0) {
2437                                         DEBUG(DEBUG_WARNING,
2438                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2439                                 }
2440
2441                                 first_add = false;
2442                         }
2443
2444                         ifaces = vnn->ifaces->iface->name;
2445                         iface = vnn->ifaces->next;
2446                         while (iface != NULL) {
2447                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2448                                                          iface->iface->name);
2449                                 iface = iface->next;
2450                         }
2451
2452                         len   = strlen(ifaces) + 1;
2453                         pub = talloc_zero_size(mem_ctx,
2454                                                offsetof(struct ctdb_addr_info_old, iface) + len);
2455                         CTDB_NO_MEMORY(ctdb, pub);
2456
2457                         pub->addr  = vnn->public_address;
2458                         pub->mask  = vnn->public_netmask_bits;
2459                         pub->len   = len;
2460                         memcpy(&pub->iface[0], ifaces, pub->len);
2461
2462                         timeout = TAKEOVER_TIMEOUT();
2463
2464                         data.dsize = offsetof(struct ctdb_addr_info_old,
2465                                               iface) + pub->len;
2466                         data.dptr = (uint8_t *)pub;
2467
2468                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2469                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
2470                                                   0, data, async_data,
2471                                                   &timeout, NULL);
2472                         if (state == NULL) {
2473                                 DEBUG(DEBUG_ERR,
2474                                       (__location__
2475                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2476                                 goto failed;
2477                         }
2478
2479                         ctdb_client_async_add(async_data, state);
2480                 }
2481         }
2482
2483         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2484                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2485                 goto failed;
2486         }
2487
2488         talloc_free(mem_ctx);
2489         return 0;
2490
2491 failed:
2492         talloc_free(mem_ctx);
2493         return -1;
2494 }
2495
2496 /* This control is sent to force the node to re-read the public addresses file
2497    and drop any addresses we should nnot longer host, and add new addresses
2498    that we are now able to host
2499 */
2500 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2501 {
2502         struct ctdb_reloadips_handle *h;
2503         pid_t parent = getpid();
2504
2505         if (ctdb->reload_ips != NULL) {
2506                 talloc_free(ctdb->reload_ips);
2507                 ctdb->reload_ips = NULL;
2508         }
2509
2510         h = talloc(ctdb, struct ctdb_reloadips_handle);
2511         CTDB_NO_MEMORY(ctdb, h);
2512         h->ctdb     = ctdb;
2513         h->c        = NULL;
2514         h->status   = -1;
2515         
2516         if (pipe(h->fd) == -1) {
2517                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2518                 talloc_free(h);
2519                 return -1;
2520         }
2521
2522         h->child = ctdb_fork(ctdb);
2523         if (h->child == (pid_t)-1) {
2524                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2525                 close(h->fd[0]);
2526                 close(h->fd[1]);
2527                 talloc_free(h);
2528                 return -1;
2529         }
2530
2531         /* child process */
2532         if (h->child == 0) {
2533                 signed char res = 0;
2534
2535                 close(h->fd[0]);
2536
2537                 prctl_set_comment("ctdb_reloadips");
2538                 if (switch_from_server_to_client(ctdb) != 0) {
2539                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2540                         res = -1;
2541                 } else {
2542                         res = ctdb_reloadips_child(ctdb);
2543                         if (res != 0) {
2544                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2545                         }
2546                 }
2547
2548                 sys_write(h->fd[1], &res, 1);
2549                 ctdb_wait_for_process_to_exit(parent);
2550                 _exit(0);
2551         }
2552
2553         h->c             = talloc_steal(h, c);
2554
2555         close(h->fd[1]);
2556         set_close_on_exec(h->fd[0]);
2557
2558         talloc_set_destructor(h, ctdb_reloadips_destructor);
2559
2560
2561         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2562                                ctdb_reloadips_child_handler, (void *)h);
2563         tevent_fd_set_auto_close(h->fde);
2564
2565         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2566                          ctdb_reloadips_timeout_event, h);
2567
2568         /* we reply later */
2569         *async_reply = true;
2570         return 0;
2571 }