ctdb-daemon: Fix signed/unsigned comparisons by using constant
[samba.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
35
36 #include "protocol/protocol_util.h"
37
38 #include "ctdb_private.h"
39 #include "ctdb_client.h"
40
41 #include "common/rb_tree.h"
42 #include "common/reqid.h"
43 #include "common/system.h"
44 #include "common/system_socket.h"
45 #include "common/common.h"
46 #include "common/logging.h"
47
48 #include "server/ctdb_config.h"
49
50 #include "server/ipalloc.h"
51
52 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
53
54 #define CTDB_ARP_INTERVAL 1
55 #define CTDB_ARP_REPEAT   3
56
57 struct ctdb_interface {
58         struct ctdb_interface *prev, *next;
59         const char *name;
60         bool link_up;
61         uint32_t references;
62 };
63
64 struct vnn_interface {
65         struct vnn_interface *prev, *next;
66         struct ctdb_interface *iface;
67 };
68
69 /* state associated with a public ip address */
70 struct ctdb_vnn {
71         struct ctdb_vnn *prev, *next;
72
73         struct ctdb_interface *iface;
74         struct vnn_interface *ifaces;
75         ctdb_sock_addr public_address;
76         uint8_t public_netmask_bits;
77
78         /*
79          * The node number that is serving this public address - set
80          * to CTDB_UNKNOWN_PNN if node is serving it
81          */
82         uint32_t pnn;
83
84         /* List of clients to tickle for this public address */
85         struct ctdb_tcp_array *tcp_array;
86
87         /* whether we need to update the other nodes with changes to our list
88            of connected clients */
89         bool tcp_update_needed;
90
91         /* a context to hang sending gratious arp events off */
92         TALLOC_CTX *takeover_ctx;
93
94         /* Set to true any time an update to this VNN is in flight.
95            This helps to avoid races. */
96         bool update_in_flight;
97
98         /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
99          * address then this flag is set.  It will be deleted in the
100          * release IP callback. */
101         bool delete_pending;
102 };
103
104 static const char *iface_string(const struct ctdb_interface *iface)
105 {
106         return (iface != NULL ? iface->name : "__none__");
107 }
108
109 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
110 {
111         return iface_string(vnn->iface);
112 }
113
114 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
115                                               const char *iface);
116
117 static struct ctdb_interface *
118 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
119 {
120         struct ctdb_interface *i;
121
122         if (strlen(iface) > CTDB_IFACE_SIZE) {
123                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
124                 return NULL;
125         }
126
127         /* Verify that we don't have an entry for this ip yet */
128         i = ctdb_find_iface(ctdb, iface);
129         if (i != NULL) {
130                 return i;
131         }
132
133         /* create a new structure for this interface */
134         i = talloc_zero(ctdb, struct ctdb_interface);
135         if (i == NULL) {
136                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
137                 return NULL;
138         }
139         i->name = talloc_strdup(i, iface);
140         if (i->name == NULL) {
141                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
142                 talloc_free(i);
143                 return NULL;
144         }
145
146         i->link_up = true;
147
148         DLIST_ADD(ctdb->ifaces, i);
149
150         return i;
151 }
152
153 static bool vnn_has_interface(struct ctdb_vnn *vnn,
154                               const struct ctdb_interface *iface)
155 {
156         struct vnn_interface *i;
157
158         for (i = vnn->ifaces; i != NULL; i = i->next) {
159                 if (iface == i->iface) {
160                         return true;
161                 }
162         }
163
164         return false;
165 }
166
167 /* If any interfaces now have no possible IPs then delete them.  This
168  * implementation is naive (i.e. simple) rather than clever
169  * (i.e. complex).  Given that this is run on delip and that operation
170  * is rare, this doesn't need to be efficient - it needs to be
171  * foolproof.  One alternative is reference counting, where the logic
172  * is distributed and can, therefore, be broken in multiple places.
173  * Another alternative is to build a red-black tree of interfaces that
174  * can have addresses (by walking ctdb->vnn once) and then walking
175  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
176  * one of those if the naive implementation causes problems...  :-)
177  */
178 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
179                                         struct ctdb_vnn *vnn)
180 {
181         struct ctdb_interface *i, *next;
182
183         /* For each interface, check if there's an IP using it. */
184         for (i = ctdb->ifaces; i != NULL; i = next) {
185                 struct ctdb_vnn *tv;
186                 bool found;
187                 next = i->next;
188
189                 /* Only consider interfaces named in the given VNN. */
190                 if (!vnn_has_interface(vnn, i)) {
191                         continue;
192                 }
193
194                 /* Search for a vnn with this interface. */
195                 found = false;
196                 for (tv=ctdb->vnn; tv; tv=tv->next) {
197                         if (vnn_has_interface(tv, i)) {
198                                 found = true;
199                                 break;
200                         }
201                 }
202
203                 if (!found) {
204                         /* None of the VNNs are using this interface. */
205                         DLIST_REMOVE(ctdb->ifaces, i);
206                         talloc_free(i);
207                 }
208         }
209 }
210
211
212 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
213                                               const char *iface)
214 {
215         struct ctdb_interface *i;
216
217         for (i=ctdb->ifaces;i;i=i->next) {
218                 if (strcmp(i->name, iface) == 0) {
219                         return i;
220                 }
221         }
222
223         return NULL;
224 }
225
226 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
227                                                   struct ctdb_vnn *vnn)
228 {
229         struct vnn_interface *i;
230         struct ctdb_interface *cur = NULL;
231         struct ctdb_interface *best = NULL;
232
233         for (i = vnn->ifaces; i != NULL; i = i->next) {
234
235                 cur = i->iface;
236
237                 if (!cur->link_up) {
238                         continue;
239                 }
240
241                 if (best == NULL) {
242                         best = cur;
243                         continue;
244                 }
245
246                 if (cur->references < best->references) {
247                         best = cur;
248                         continue;
249                 }
250         }
251
252         return best;
253 }
254
255 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
256                                      struct ctdb_vnn *vnn)
257 {
258         struct ctdb_interface *best = NULL;
259
260         if (vnn->iface) {
261                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
262                                    "still assigned to iface '%s'\n",
263                                    ctdb_addr_to_str(&vnn->public_address),
264                                    ctdb_vnn_iface_string(vnn)));
265                 return 0;
266         }
267
268         best = ctdb_vnn_best_iface(ctdb, vnn);
269         if (best == NULL) {
270                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
271                                   "cannot assign to iface any iface\n",
272                                   ctdb_addr_to_str(&vnn->public_address)));
273                 return -1;
274         }
275
276         vnn->iface = best;
277         best->references++;
278         vnn->pnn = ctdb->pnn;
279
280         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
281                            "now assigned to iface '%s' refs[%d]\n",
282                            ctdb_addr_to_str(&vnn->public_address),
283                            ctdb_vnn_iface_string(vnn),
284                            best->references));
285         return 0;
286 }
287
288 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
289                                     struct ctdb_vnn *vnn)
290 {
291         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
292                            "now unassigned (old iface '%s' refs[%d])\n",
293                            ctdb_addr_to_str(&vnn->public_address),
294                            ctdb_vnn_iface_string(vnn),
295                            vnn->iface?vnn->iface->references:0));
296         if (vnn->iface) {
297                 vnn->iface->references--;
298         }
299         vnn->iface = NULL;
300         if (vnn->pnn == ctdb->pnn) {
301                 vnn->pnn = CTDB_UNKNOWN_PNN;
302         }
303 }
304
305 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
306                                struct ctdb_vnn *vnn)
307 {
308         uint32_t flags;
309         struct vnn_interface *i;
310
311         /* Nodes that are not RUNNING can not host IPs */
312         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
313                 return false;
314         }
315
316         flags = ctdb->nodes[ctdb->pnn]->flags;
317         if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
318                 return false;
319         }
320
321         if (vnn->delete_pending) {
322                 return false;
323         }
324
325         if (vnn->iface && vnn->iface->link_up) {
326                 return true;
327         }
328
329         for (i = vnn->ifaces; i != NULL; i = i->next) {
330                 if (i->iface->link_up) {
331                         return true;
332                 }
333         }
334
335         return false;
336 }
337
338 struct ctdb_takeover_arp {
339         struct ctdb_context *ctdb;
340         uint32_t count;
341         ctdb_sock_addr addr;
342         struct ctdb_tcp_array *tcparray;
343         struct ctdb_vnn *vnn;
344 };
345
346
347 /*
348   lists of tcp endpoints
349  */
350 struct ctdb_tcp_list {
351         struct ctdb_tcp_list *prev, *next;
352         struct ctdb_connection connection;
353 };
354
355 /*
356   list of clients to kill on IP release
357  */
358 struct ctdb_client_ip {
359         struct ctdb_client_ip *prev, *next;
360         struct ctdb_context *ctdb;
361         ctdb_sock_addr addr;
362         uint32_t client_id;
363 };
364
365
366 /*
367   send a gratuitous arp
368  */
369 static void ctdb_control_send_arp(struct tevent_context *ev,
370                                   struct tevent_timer *te,
371                                   struct timeval t, void *private_data)
372 {
373         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
374                                                         struct ctdb_takeover_arp);
375         int ret;
376         struct ctdb_tcp_array *tcparray;
377         const char *iface = ctdb_vnn_iface_string(arp->vnn);
378
379         ret = ctdb_sys_send_arp(&arp->addr, iface);
380         if (ret != 0) {
381                 DBG_ERR("Failed to send ARP on interface %s: %s\n",
382                         iface, strerror(ret));
383         }
384
385         tcparray = arp->tcparray;
386         if (tcparray) {
387                 unsigned int i;
388
389                 for (i=0;i<tcparray->num;i++) {
390                         struct ctdb_connection *tcon;
391
392                         tcon = &tcparray->connections[i];
393                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
394                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
395                                 ctdb_addr_to_str(&tcon->src),
396                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
397                         ret = ctdb_sys_send_tcp(
398                                 &tcon->src,
399                                 &tcon->dst,
400                                 0, 0, 0);
401                         if (ret != 0) {
402                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
403                                         ctdb_addr_to_str(&tcon->src)));
404                         }
405                 }
406         }
407
408         arp->count++;
409
410         if (arp->count == CTDB_ARP_REPEAT) {
411                 talloc_free(arp);
412                 return;
413         }
414
415         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
416                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
417                          ctdb_control_send_arp, arp);
418 }
419
420 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
421                                        struct ctdb_vnn *vnn)
422 {
423         struct ctdb_takeover_arp *arp;
424         struct ctdb_tcp_array *tcparray;
425
426         if (!vnn->takeover_ctx) {
427                 vnn->takeover_ctx = talloc_new(vnn);
428                 if (!vnn->takeover_ctx) {
429                         return -1;
430                 }
431         }
432
433         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
434         if (!arp) {
435                 return -1;
436         }
437
438         arp->ctdb = ctdb;
439         arp->addr = vnn->public_address;
440         arp->vnn  = vnn;
441
442         tcparray = vnn->tcp_array;
443         if (tcparray) {
444                 /* add all of the known tcp connections for this IP to the
445                    list of tcp connections to send tickle acks for */
446                 arp->tcparray = talloc_steal(arp, tcparray);
447
448                 vnn->tcp_array = NULL;
449                 vnn->tcp_update_needed = true;
450         }
451
452         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
453                          timeval_zero(), ctdb_control_send_arp, arp);
454
455         return 0;
456 }
457
458 struct ctdb_do_takeip_state {
459         struct ctdb_req_control_old *c;
460         struct ctdb_vnn *vnn;
461 };
462
463 /*
464   called when takeip event finishes
465  */
466 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
467                                     void *private_data)
468 {
469         struct ctdb_do_takeip_state *state =
470                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
471         int32_t ret;
472         TDB_DATA data;
473
474         if (status != 0) {
475                 if (status == -ETIMEDOUT) {
476                         ctdb_ban_self(ctdb);
477                 }
478                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
479                                  ctdb_addr_to_str(&state->vnn->public_address),
480                                  ctdb_vnn_iface_string(state->vnn)));
481                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
482
483                 talloc_free(state);
484                 return;
485         }
486
487         if (ctdb->do_checkpublicip) {
488
489         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
490         if (ret != 0) {
491                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
492                 talloc_free(state);
493                 return;
494         }
495
496         }
497
498         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
499         data.dsize = strlen((char *)data.dptr) + 1;
500         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
501
502         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
503
504
505         /* the control succeeded */
506         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
507         talloc_free(state);
508         return;
509 }
510
511 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
512 {
513         state->vnn->update_in_flight = false;
514         return 0;
515 }
516
517 /*
518   take over an ip address
519  */
520 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
521                               struct ctdb_req_control_old *c,
522                               struct ctdb_vnn *vnn)
523 {
524         int ret;
525         struct ctdb_do_takeip_state *state;
526
527         if (vnn->update_in_flight) {
528                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
529                                     "update for this IP already in flight\n",
530                                     ctdb_addr_to_str(&vnn->public_address),
531                                     vnn->public_netmask_bits));
532                 return -1;
533         }
534
535         ret = ctdb_vnn_assign_iface(ctdb, vnn);
536         if (ret != 0) {
537                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
538                                  "assign a usable interface\n",
539                                  ctdb_addr_to_str(&vnn->public_address),
540                                  vnn->public_netmask_bits));
541                 return -1;
542         }
543
544         state = talloc(vnn, struct ctdb_do_takeip_state);
545         CTDB_NO_MEMORY(ctdb, state);
546
547         state->c = NULL;
548         state->vnn   = vnn;
549
550         vnn->update_in_flight = true;
551         talloc_set_destructor(state, ctdb_takeip_destructor);
552
553         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
554                             ctdb_addr_to_str(&vnn->public_address),
555                             vnn->public_netmask_bits,
556                             ctdb_vnn_iface_string(vnn)));
557
558         ret = ctdb_event_script_callback(ctdb,
559                                          state,
560                                          ctdb_do_takeip_callback,
561                                          state,
562                                          CTDB_EVENT_TAKE_IP,
563                                          "%s %s %u",
564                                          ctdb_vnn_iface_string(vnn),
565                                          ctdb_addr_to_str(&vnn->public_address),
566                                          vnn->public_netmask_bits);
567
568         if (ret != 0) {
569                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
570                         ctdb_addr_to_str(&vnn->public_address),
571                         ctdb_vnn_iface_string(vnn)));
572                 talloc_free(state);
573                 return -1;
574         }
575
576         state->c = talloc_steal(ctdb, c);
577         return 0;
578 }
579
580 struct ctdb_do_updateip_state {
581         struct ctdb_req_control_old *c;
582         struct ctdb_interface *old;
583         struct ctdb_vnn *vnn;
584 };
585
586 /*
587   called when updateip event finishes
588  */
589 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
590                                       void *private_data)
591 {
592         struct ctdb_do_updateip_state *state =
593                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
594
595         if (status != 0) {
596                 if (status == -ETIMEDOUT) {
597                         ctdb_ban_self(ctdb);
598                 }
599                 DEBUG(DEBUG_ERR,
600                       ("Failed update of IP %s from interface %s to %s\n",
601                        ctdb_addr_to_str(&state->vnn->public_address),
602                        iface_string(state->old),
603                        ctdb_vnn_iface_string(state->vnn)));
604
605                 /*
606                  * All we can do is reset the old interface
607                  * and let the next run fix it
608                  */
609                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
610                 state->vnn->iface = state->old;
611                 state->vnn->iface->references++;
612
613                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
614                 talloc_free(state);
615                 return;
616         }
617
618         /* the control succeeded */
619         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
620         talloc_free(state);
621         return;
622 }
623
624 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
625 {
626         state->vnn->update_in_flight = false;
627         return 0;
628 }
629
630 /*
631   update (move) an ip address
632  */
633 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
634                                 struct ctdb_req_control_old *c,
635                                 struct ctdb_vnn *vnn)
636 {
637         int ret;
638         struct ctdb_do_updateip_state *state;
639         struct ctdb_interface *old = vnn->iface;
640         const char *old_name = iface_string(old);
641         const char *new_name;
642
643         if (vnn->update_in_flight) {
644                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
645                                     "update for this IP already in flight\n",
646                                     ctdb_addr_to_str(&vnn->public_address),
647                                     vnn->public_netmask_bits));
648                 return -1;
649         }
650
651         ctdb_vnn_unassign_iface(ctdb, vnn);
652         ret = ctdb_vnn_assign_iface(ctdb, vnn);
653         if (ret != 0) {
654                 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
655                                  "assign a usable interface (old iface '%s')\n",
656                                  ctdb_addr_to_str(&vnn->public_address),
657                                  vnn->public_netmask_bits,
658                                  old_name));
659                 return -1;
660         }
661
662         if (old == vnn->iface) {
663                 /* A benign update from one interface onto itself.
664                  * no need to run the eventscripts in this case, just return
665                  * success.
666                  */
667                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
668                 return 0;
669         }
670
671         state = talloc(vnn, struct ctdb_do_updateip_state);
672         CTDB_NO_MEMORY(ctdb, state);
673
674         state->c = NULL;
675         state->old = old;
676         state->vnn = vnn;
677
678         vnn->update_in_flight = true;
679         talloc_set_destructor(state, ctdb_updateip_destructor);
680
681         new_name = ctdb_vnn_iface_string(vnn);
682         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
683                             "interface %s to %s\n",
684                             ctdb_addr_to_str(&vnn->public_address),
685                             vnn->public_netmask_bits,
686                             old_name,
687                             new_name));
688
689         ret = ctdb_event_script_callback(ctdb,
690                                          state,
691                                          ctdb_do_updateip_callback,
692                                          state,
693                                          CTDB_EVENT_UPDATE_IP,
694                                          "%s %s %s %u",
695                                          old_name,
696                                          new_name,
697                                          ctdb_addr_to_str(&vnn->public_address),
698                                          vnn->public_netmask_bits);
699         if (ret != 0) {
700                 DEBUG(DEBUG_ERR,
701                       ("Failed update IP %s from interface %s to %s\n",
702                        ctdb_addr_to_str(&vnn->public_address),
703                        old_name, new_name));
704                 talloc_free(state);
705                 return -1;
706         }
707
708         state->c = talloc_steal(ctdb, c);
709         return 0;
710 }
711
712 /*
713   Find the vnn of the node that has a public ip address
714   returns -1 if the address is not known as a public address
715  */
716 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
717 {
718         struct ctdb_vnn *vnn;
719
720         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
721                 if (ctdb_same_ip(&vnn->public_address, addr)) {
722                         return vnn;
723                 }
724         }
725
726         return NULL;
727 }
728
729 /*
730   take over an ip address
731  */
732 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
733                                  struct ctdb_req_control_old *c,
734                                  TDB_DATA indata,
735                                  bool *async_reply)
736 {
737         int ret;
738         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
739         struct ctdb_vnn *vnn;
740         bool have_ip = false;
741         bool do_updateip = false;
742         bool do_takeip = false;
743         struct ctdb_interface *best_iface = NULL;
744
745         if (pip->pnn != ctdb->pnn) {
746                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
747                                  "with pnn %d, but we're node %d\n",
748                                  ctdb_addr_to_str(&pip->addr),
749                                  pip->pnn, ctdb->pnn));
750                 return -1;
751         }
752
753         /* update out vnn list */
754         vnn = find_public_ip_vnn(ctdb, &pip->addr);
755         if (vnn == NULL) {
756                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
757                         ctdb_addr_to_str(&pip->addr)));
758                 return 0;
759         }
760
761         if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
762                 have_ip = ctdb_sys_have_ip(&pip->addr);
763         }
764         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
765         if (best_iface == NULL) {
766                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
767                                  "a usable interface (old %s, have_ip %d)\n",
768                                  ctdb_addr_to_str(&vnn->public_address),
769                                  vnn->public_netmask_bits,
770                                  ctdb_vnn_iface_string(vnn),
771                                  have_ip));
772                 return -1;
773         }
774
775         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != CTDB_UNKNOWN_PNN) {
776                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
777                                   "and we have it on iface[%s], but it was assigned to node %d"
778                                   "and we are node %d, banning ourself\n",
779                                  ctdb_addr_to_str(&vnn->public_address),
780                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
781                 ctdb_ban_self(ctdb);
782                 return -1;
783         }
784
785         if (vnn->pnn == CTDB_UNKNOWN_PNN && have_ip) {
786                 /* This will cause connections to be reset and
787                  * reestablished.  However, this is a very unusual
788                  * situation and doing this will completely repair the
789                  * inconsistency in the VNN.
790                  */
791                 DEBUG(DEBUG_WARNING,
792                       (__location__
793                        " Doing updateip for IP %s already on an interface\n",
794                        ctdb_addr_to_str(&vnn->public_address)));
795                 do_updateip = true;
796         }
797
798         if (vnn->iface) {
799                 if (vnn->iface != best_iface) {
800                         if (!vnn->iface->link_up) {
801                                 do_updateip = true;
802                         } else if (vnn->iface->references > (best_iface->references + 1)) {
803                                 /* only move when the rebalance gains something */
804                                         do_updateip = true;
805                         }
806                 }
807         }
808
809         if (!have_ip) {
810                 if (do_updateip) {
811                         ctdb_vnn_unassign_iface(ctdb, vnn);
812                         do_updateip = false;
813                 }
814                 do_takeip = true;
815         }
816
817         if (do_takeip) {
818                 ret = ctdb_do_takeip(ctdb, c, vnn);
819                 if (ret != 0) {
820                         return -1;
821                 }
822         } else if (do_updateip) {
823                 ret = ctdb_do_updateip(ctdb, c, vnn);
824                 if (ret != 0) {
825                         return -1;
826                 }
827         } else {
828                 /*
829                  * The interface is up and the kernel known the ip
830                  * => do nothing
831                  */
832                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
833                         ctdb_addr_to_str(&pip->addr),
834                         vnn->public_netmask_bits,
835                         ctdb_vnn_iface_string(vnn)));
836                 return 0;
837         }
838
839         /* tell ctdb_control.c that we will be replying asynchronously */
840         *async_reply = true;
841
842         return 0;
843 }
844
845 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
846 {
847         DLIST_REMOVE(ctdb->vnn, vnn);
848         ctdb_vnn_unassign_iface(ctdb, vnn);
849         ctdb_remove_orphaned_ifaces(ctdb, vnn);
850         talloc_free(vnn);
851 }
852
853 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
854                                         struct ctdb_vnn *vnn,
855                                         ctdb_sock_addr *addr)
856 {
857         TDB_DATA data;
858
859         /* Send a message to all clients of this node telling them
860          * that the cluster has been reconfigured and they should
861          * close any connections on this IP address
862          */
863         data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
864         data.dsize = strlen((char *)data.dptr)+1;
865         DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
866         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
867
868         ctdb_vnn_unassign_iface(ctdb, vnn);
869
870         /* Process the IP if it has been marked for deletion */
871         if (vnn->delete_pending) {
872                 do_delete_ip(ctdb, vnn);
873                 return NULL;
874         }
875
876         return vnn;
877 }
878
879 struct release_ip_callback_state {
880         struct ctdb_req_control_old *c;
881         ctdb_sock_addr *addr;
882         struct ctdb_vnn *vnn;
883         uint32_t target_pnn;
884 };
885
886 /*
887   called when releaseip event finishes
888  */
889 static void release_ip_callback(struct ctdb_context *ctdb, int status,
890                                 void *private_data)
891 {
892         struct release_ip_callback_state *state =
893                 talloc_get_type(private_data, struct release_ip_callback_state);
894
895         if (status == -ETIMEDOUT) {
896                 ctdb_ban_self(ctdb);
897         }
898
899         if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
900                 if  (ctdb_sys_have_ip(state->addr)) {
901                         DEBUG(DEBUG_ERR,
902                               ("IP %s still hosted during release IP callback, failing\n",
903                                ctdb_addr_to_str(state->addr)));
904                         ctdb_request_control_reply(ctdb, state->c,
905                                                    NULL, -1, NULL);
906                         talloc_free(state);
907                         return;
908                 }
909         }
910
911         state->vnn->pnn = state->target_pnn;
912         state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
913
914         /* the control succeeded */
915         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
916         talloc_free(state);
917 }
918
919 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
920 {
921         if (state->vnn != NULL) {
922                 state->vnn->update_in_flight = false;
923         }
924         return 0;
925 }
926
927 /*
928   release an ip address
929  */
930 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
931                                 struct ctdb_req_control_old *c,
932                                 TDB_DATA indata, 
933                                 bool *async_reply)
934 {
935         int ret;
936         struct release_ip_callback_state *state;
937         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
938         struct ctdb_vnn *vnn;
939         const char *iface;
940
941         /* update our vnn list */
942         vnn = find_public_ip_vnn(ctdb, &pip->addr);
943         if (vnn == NULL) {
944                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
945                         ctdb_addr_to_str(&pip->addr)));
946                 return 0;
947         }
948
949         /* stop any previous arps */
950         talloc_free(vnn->takeover_ctx);
951         vnn->takeover_ctx = NULL;
952
953         /* RELEASE_IP controls are sent to all nodes that should not
954          * be hosting a particular IP.  This serves 2 purposes.  The
955          * first is to help resolve any inconsistencies.  If a node
956          * does unexpectly host an IP then it will be released.  The
957          * 2nd is to use a "redundant release" to tell non-takeover
958          * nodes where an IP is moving to.  This is how "ctdb ip" can
959          * report the (likely) location of an IP by only asking the
960          * local node.  Redundant releases need to update the PNN but
961          * are otherwise ignored.
962          */
963         if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
964                 if (!ctdb_sys_have_ip(&pip->addr)) {
965                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
966                                 ctdb_addr_to_str(&pip->addr),
967                                 vnn->public_netmask_bits,
968                                 ctdb_vnn_iface_string(vnn)));
969                         vnn->pnn = pip->pnn;
970                         ctdb_vnn_unassign_iface(ctdb, vnn);
971                         return 0;
972                 }
973         } else {
974                 if (vnn->iface == NULL) {
975                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
976                                            ctdb_addr_to_str(&pip->addr),
977                                            vnn->public_netmask_bits));
978                         vnn->pnn = pip->pnn;
979                         return 0;
980                 }
981         }
982
983         /* There is a potential race between take_ip and us because we
984          * update the VNN via a callback that run when the
985          * eventscripts have been run.  Avoid the race by allowing one
986          * update to be in flight at a time.
987          */
988         if (vnn->update_in_flight) {
989                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
990                                     "update for this IP already in flight\n",
991                                     ctdb_addr_to_str(&vnn->public_address),
992                                     vnn->public_netmask_bits));
993                 return -1;
994         }
995
996         iface = ctdb_vnn_iface_string(vnn);
997
998         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
999                 ctdb_addr_to_str(&pip->addr),
1000                 vnn->public_netmask_bits,
1001                 iface,
1002                 pip->pnn));
1003
1004         state = talloc(ctdb, struct release_ip_callback_state);
1005         if (state == NULL) {
1006                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1007                                __FILE__, __LINE__);
1008                 return -1;
1009         }
1010
1011         state->c = NULL;
1012         state->addr = talloc(state, ctdb_sock_addr);
1013         if (state->addr == NULL) {
1014                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1015                                __FILE__, __LINE__);
1016                 talloc_free(state);
1017                 return -1;
1018         }
1019         *state->addr = pip->addr;
1020         state->target_pnn = pip->pnn;
1021         state->vnn   = vnn;
1022
1023         vnn->update_in_flight = true;
1024         talloc_set_destructor(state, ctdb_releaseip_destructor);
1025
1026         ret = ctdb_event_script_callback(ctdb, 
1027                                          state, release_ip_callback, state,
1028                                          CTDB_EVENT_RELEASE_IP,
1029                                          "%s %s %u",
1030                                          iface,
1031                                          ctdb_addr_to_str(&pip->addr),
1032                                          vnn->public_netmask_bits);
1033         if (ret != 0) {
1034                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1035                         ctdb_addr_to_str(&pip->addr),
1036                         ctdb_vnn_iface_string(vnn)));
1037                 talloc_free(state);
1038                 return -1;
1039         }
1040
1041         /* tell the control that we will be reply asynchronously */
1042         *async_reply = true;
1043         state->c = talloc_steal(state, c);
1044         return 0;
1045 }
1046
1047 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1048                                    ctdb_sock_addr *addr,
1049                                    unsigned mask, const char *ifaces,
1050                                    bool check_address)
1051 {
1052         struct ctdb_vnn      *vnn;
1053         char *tmp;
1054         const char *iface;
1055
1056         /* Verify that we don't have an entry for this IP yet */
1057         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1058                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1059                         DEBUG(DEBUG_ERR,
1060                               ("Duplicate public IP address '%s'\n",
1061                                ctdb_addr_to_str(addr)));
1062                         return -1;
1063                 }
1064         }
1065
1066         /* Create a new VNN structure for this IP address */
1067         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1068         if (vnn == NULL) {
1069                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1070                 return -1;
1071         }
1072         tmp = talloc_strdup(vnn, ifaces);
1073         if (tmp == NULL) {
1074                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1075                 talloc_free(vnn);
1076                 return -1;
1077         }
1078         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1079                 struct vnn_interface *vnn_iface;
1080                 struct ctdb_interface *i;
1081                 if (!ctdb_sys_check_iface_exists(iface)) {
1082                         DEBUG(DEBUG_ERR,
1083                               ("Unknown interface %s for public address %s\n",
1084                                iface, ctdb_addr_to_str(addr)));
1085                         talloc_free(vnn);
1086                         return -1;
1087                 }
1088
1089                 i = ctdb_add_local_iface(ctdb, iface);
1090                 if (i == NULL) {
1091                         DEBUG(DEBUG_ERR,
1092                               ("Failed to add interface '%s' "
1093                                "for public address %s\n",
1094                                iface, ctdb_addr_to_str(addr)));
1095                         talloc_free(vnn);
1096                         return -1;
1097                 }
1098
1099                 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1100                 if (vnn_iface == NULL) {
1101                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1102                         talloc_free(vnn);
1103                         return -1;
1104                 }
1105
1106                 vnn_iface->iface = i;
1107                 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1108         }
1109         talloc_free(tmp);
1110         vnn->public_address      = *addr;
1111         vnn->public_netmask_bits = mask;
1112         vnn->pnn                 = -1;
1113
1114         DLIST_ADD(ctdb->vnn, vnn);
1115
1116         return 0;
1117 }
1118
1119 /*
1120   setup the public address lists from a file
1121 */
1122 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1123 {
1124         bool ok;
1125         char **lines;
1126         int nlines;
1127         int i;
1128
1129         /* If no public addresses file given then try the default */
1130         if (ctdb->public_addresses_file == NULL) {
1131                 const char *b = getenv("CTDB_BASE");
1132                 if (b == NULL) {
1133                         DBG_ERR("CTDB_BASE not set\n");
1134                         return -1;
1135                 }
1136                 ctdb->public_addresses_file = talloc_asprintf(
1137                                         ctdb, "%s/%s", b, "public_addresses");
1138                 if (ctdb->public_addresses_file == NULL) {
1139                         DBG_ERR("Out of memory\n");
1140                         return -1;
1141                 }
1142         }
1143
1144         /* If the file doesn't exist then warn and do nothing */
1145         ok = file_exist(ctdb->public_addresses_file);
1146         if (!ok) {
1147                 D_WARNING("Not loading public addresses, no file %s\n",
1148                           ctdb->public_addresses_file);
1149                 return 0;
1150         }
1151
1152         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1153         if (lines == NULL) {
1154                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1155                 return -1;
1156         }
1157         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1158                 nlines--;
1159         }
1160
1161         for (i=0;i<nlines;i++) {
1162                 unsigned mask;
1163                 ctdb_sock_addr addr;
1164                 const char *addrstr;
1165                 const char *ifaces;
1166                 char *tok, *line;
1167                 int ret;
1168
1169                 line = lines[i];
1170                 while ((*line == ' ') || (*line == '\t')) {
1171                         line++;
1172                 }
1173                 if (*line == '#') {
1174                         continue;
1175                 }
1176                 if (strcmp(line, "") == 0) {
1177                         continue;
1178                 }
1179                 tok = strtok(line, " \t");
1180                 addrstr = tok;
1181
1182                 tok = strtok(NULL, " \t");
1183                 if (tok == NULL) {
1184                         D_ERR("No interface specified at line %u "
1185                               "of public addresses file\n", i+1);
1186                         talloc_free(lines);
1187                         return -1;
1188                 }
1189                 ifaces = tok;
1190
1191                 if (addrstr == NULL) {
1192                         D_ERR("Badly formed line %u in public address list\n",
1193                               i+1);
1194                         talloc_free(lines);
1195                         return -1;
1196                 }
1197
1198                 ret = ctdb_sock_addr_mask_from_string(addrstr, &addr, &mask);
1199                 if (ret != 0) {
1200                         D_ERR("Badly formed line %u in public address list\n",
1201                               i+1);
1202                         talloc_free(lines);
1203                         return -1;
1204                 }
1205
1206                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1207                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1208                         talloc_free(lines);
1209                         return -1;
1210                 }
1211         }
1212
1213
1214         D_NOTICE("Loaded public addresses from %s\n",
1215                  ctdb->public_addresses_file);
1216
1217         talloc_free(lines);
1218         return 0;
1219 }
1220
1221 /*
1222   destroy a ctdb_client_ip structure
1223  */
1224 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1225 {
1226         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1227                 ctdb_addr_to_str(&ip->addr),
1228                 ntohs(ip->addr.ip.sin_port),
1229                 ip->client_id));
1230
1231         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1232         return 0;
1233 }
1234
1235 /*
1236   called by a client to inform us of a TCP connection that it is managing
1237   that should tickled with an ACK when IP takeover is done
1238  */
1239 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1240                                 TDB_DATA indata)
1241 {
1242         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1243         struct ctdb_connection *tcp_sock = NULL;
1244         struct ctdb_tcp_list *tcp;
1245         struct ctdb_connection t;
1246         int ret;
1247         TDB_DATA data;
1248         struct ctdb_client_ip *ip;
1249         struct ctdb_vnn *vnn;
1250         ctdb_sock_addr src_addr;
1251         ctdb_sock_addr dst_addr;
1252
1253         /* If we don't have public IPs, tickles are useless */
1254         if (ctdb->vnn == NULL) {
1255                 return 0;
1256         }
1257
1258         tcp_sock = (struct ctdb_connection *)indata.dptr;
1259
1260         src_addr = tcp_sock->src;
1261         ctdb_canonicalize_ip(&src_addr,  &tcp_sock->src);
1262         ZERO_STRUCT(src_addr);
1263         memcpy(&src_addr, &tcp_sock->src, sizeof(src_addr));
1264
1265         dst_addr = tcp_sock->dst;
1266         ctdb_canonicalize_ip(&dst_addr, &tcp_sock->dst);
1267         ZERO_STRUCT(dst_addr);
1268         memcpy(&dst_addr, &tcp_sock->dst, sizeof(dst_addr));
1269
1270         vnn = find_public_ip_vnn(ctdb, &dst_addr);
1271         if (vnn == NULL) {
1272                 char *src_addr_str = NULL;
1273                 char *dst_addr_str = NULL;
1274
1275                 switch (dst_addr.sa.sa_family) {
1276                 case AF_INET:
1277                         if (ntohl(dst_addr.ip.sin_addr.s_addr) == INADDR_LOOPBACK) {
1278                                 /* ignore ... */
1279                                 return 0;
1280                         }
1281                         break;
1282                 case AF_INET6:
1283                         break;
1284                 default:
1285                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n",
1286                               dst_addr.sa.sa_family));
1287                         return 0;
1288                 }
1289
1290                 src_addr_str = ctdb_sock_addr_to_string(client, &src_addr, false);
1291                 dst_addr_str = ctdb_sock_addr_to_string(client, &dst_addr, false);
1292                 DEBUG(DEBUG_ERR,(
1293                       "Could not register TCP connection from "
1294                       "%s to %s (not a public address) (port %u) "
1295                       "(client_id %u pid %u).\n",
1296                       src_addr_str,
1297                       dst_addr_str,
1298                       ctdb_sock_addr_port(&dst_addr),
1299                       client_id, client->pid));
1300                 TALLOC_FREE(src_addr_str);
1301                 TALLOC_FREE(dst_addr_str);
1302                 return 0;
1303         }
1304
1305         if (vnn->pnn != ctdb->pnn) {
1306                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1307                         ctdb_addr_to_str(&dst_addr),
1308                         client_id, client->pid));
1309                 /* failing this call will tell smbd to die */
1310                 return -1;
1311         }
1312
1313         ip = talloc(client, struct ctdb_client_ip);
1314         CTDB_NO_MEMORY(ctdb, ip);
1315
1316         ip->ctdb      = ctdb;
1317         ip->addr      = dst_addr;
1318         ip->client_id = client_id;
1319         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1320         DLIST_ADD(ctdb->client_ip_list, ip);
1321
1322         tcp = talloc(client, struct ctdb_tcp_list);
1323         CTDB_NO_MEMORY(ctdb, tcp);
1324
1325         tcp->connection.src = tcp_sock->src;
1326         tcp->connection.dst = tcp_sock->dst;
1327
1328         DLIST_ADD(client->tcp_list, tcp);
1329
1330         t.src = tcp_sock->src;
1331         t.dst = tcp_sock->dst;
1332
1333         data.dptr = (uint8_t *)&t;
1334         data.dsize = sizeof(t);
1335
1336         switch (dst_addr.sa.sa_family) {
1337         case AF_INET:
1338                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1339                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1340                         ctdb_addr_to_str(&tcp_sock->src),
1341                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1342                 break;
1343         case AF_INET6:
1344                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1345                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1346                         ctdb_addr_to_str(&tcp_sock->src),
1347                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1348                 break;
1349         default:
1350                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n",
1351                       dst_addr.sa.sa_family));
1352         }
1353
1354
1355         /* tell all nodes about this tcp connection */
1356         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1357                                        CTDB_CONTROL_TCP_ADD,
1358                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1359         if (ret != 0) {
1360                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1361                 return -1;
1362         }
1363
1364         return 0;
1365 }
1366
1367 /*
1368   find a tcp address on a list
1369  */
1370 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1371                                            struct ctdb_connection *tcp)
1372 {
1373         unsigned int i;
1374
1375         if (array == NULL) {
1376                 return NULL;
1377         }
1378
1379         for (i=0;i<array->num;i++) {
1380                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1381                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1382                         return &array->connections[i];
1383                 }
1384         }
1385         return NULL;
1386 }
1387
1388
1389
1390 /*
1391   called by a daemon to inform us of a TCP connection that one of its
1392   clients managing that should tickled with an ACK when IP takeover is
1393   done
1394  */
1395 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1396 {
1397         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1398         struct ctdb_tcp_array *tcparray;
1399         struct ctdb_connection tcp;
1400         struct ctdb_vnn *vnn;
1401
1402         /* If we don't have public IPs, tickles are useless */
1403         if (ctdb->vnn == NULL) {
1404                 return 0;
1405         }
1406
1407         vnn = find_public_ip_vnn(ctdb, &p->dst);
1408         if (vnn == NULL) {
1409                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1410                         ctdb_addr_to_str(&p->dst)));
1411
1412                 return -1;
1413         }
1414
1415
1416         tcparray = vnn->tcp_array;
1417
1418         /* If this is the first tickle */
1419         if (tcparray == NULL) {
1420                 tcparray = talloc(vnn, struct ctdb_tcp_array);
1421                 CTDB_NO_MEMORY(ctdb, tcparray);
1422                 vnn->tcp_array = tcparray;
1423
1424                 tcparray->num = 0;
1425                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1426                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1427
1428                 tcparray->connections[tcparray->num].src = p->src;
1429                 tcparray->connections[tcparray->num].dst = p->dst;
1430                 tcparray->num++;
1431
1432                 if (tcp_update_needed) {
1433                         vnn->tcp_update_needed = true;
1434                 }
1435                 return 0;
1436         }
1437
1438
1439         /* Do we already have this tickle ?*/
1440         tcp.src = p->src;
1441         tcp.dst = p->dst;
1442         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1443                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1444                         ctdb_addr_to_str(&tcp.dst),
1445                         ntohs(tcp.dst.ip.sin_port),
1446                         vnn->pnn));
1447                 return 0;
1448         }
1449
1450         /* A new tickle, we must add it to the array */
1451         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1452                                         struct ctdb_connection,
1453                                         tcparray->num+1);
1454         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1455
1456         tcparray->connections[tcparray->num].src = p->src;
1457         tcparray->connections[tcparray->num].dst = p->dst;
1458         tcparray->num++;
1459
1460         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1461                 ctdb_addr_to_str(&tcp.dst),
1462                 ntohs(tcp.dst.ip.sin_port),
1463                 vnn->pnn));
1464
1465         if (tcp_update_needed) {
1466                 vnn->tcp_update_needed = true;
1467         }
1468
1469         return 0;
1470 }
1471
1472
1473 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1474 {
1475         struct ctdb_connection *tcpp;
1476
1477         if (vnn == NULL) {
1478                 return;
1479         }
1480
1481         /* if the array is empty we cant remove it
1482            and we don't need to do anything
1483          */
1484         if (vnn->tcp_array == NULL) {
1485                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1486                         ctdb_addr_to_str(&conn->dst),
1487                         ntohs(conn->dst.ip.sin_port)));
1488                 return;
1489         }
1490
1491
1492         /* See if we know this connection
1493            if we don't know this connection  then we dont need to do anything
1494          */
1495         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1496         if (tcpp == NULL) {
1497                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1498                         ctdb_addr_to_str(&conn->dst),
1499                         ntohs(conn->dst.ip.sin_port)));
1500                 return;
1501         }
1502
1503
1504         /* We need to remove this entry from the array.
1505            Instead of allocating a new array and copying data to it
1506            we cheat and just copy the last entry in the existing array
1507            to the entry that is to be removed and just shring the 
1508            ->num field
1509          */
1510         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1511         vnn->tcp_array->num--;
1512
1513         /* If we deleted the last entry we also need to remove the entire array
1514          */
1515         if (vnn->tcp_array->num == 0) {
1516                 talloc_free(vnn->tcp_array);
1517                 vnn->tcp_array = NULL;
1518         }               
1519
1520         vnn->tcp_update_needed = true;
1521
1522         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1523                 ctdb_addr_to_str(&conn->src),
1524                 ntohs(conn->src.ip.sin_port)));
1525 }
1526
1527
1528 /*
1529   called by a daemon to inform us of a TCP connection that one of its
1530   clients used are no longer needed in the tickle database
1531  */
1532 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1533 {
1534         struct ctdb_vnn *vnn;
1535         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1536
1537         /* If we don't have public IPs, tickles are useless */
1538         if (ctdb->vnn == NULL) {
1539                 return 0;
1540         }
1541
1542         vnn = find_public_ip_vnn(ctdb, &conn->dst);
1543         if (vnn == NULL) {
1544                 DEBUG(DEBUG_ERR,
1545                       (__location__ " unable to find public address %s\n",
1546                        ctdb_addr_to_str(&conn->dst)));
1547                 return 0;
1548         }
1549
1550         ctdb_remove_connection(vnn, conn);
1551
1552         return 0;
1553 }
1554
1555
1556 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1557                                               bool force);
1558
1559 /*
1560   Called when another daemon starts - causes all tickles for all
1561   public addresses we are serving to be sent to the new node on the
1562   next check.  This actually causes the tickles to be sent to the
1563   other node immediately.  In case there is an error, the periodic
1564   timer will send the updates on timer event.  This is simple and
1565   doesn't require careful error handling.
1566  */
1567 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1568 {
1569         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1570                            (unsigned long) pnn));
1571
1572         ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1573         return 0;
1574 }
1575
1576
1577 /*
1578   called when a client structure goes away - hook to remove
1579   elements from the tcp_list in all daemons
1580  */
1581 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1582 {
1583         while (client->tcp_list) {
1584                 struct ctdb_vnn *vnn;
1585                 struct ctdb_tcp_list *tcp = client->tcp_list;
1586                 struct ctdb_connection *conn = &tcp->connection;
1587
1588                 DLIST_REMOVE(client->tcp_list, tcp);
1589
1590                 vnn = find_public_ip_vnn(client->ctdb,
1591                                          &conn->dst);
1592                 if (vnn == NULL) {
1593                         DEBUG(DEBUG_ERR,
1594                               (__location__ " unable to find public address %s\n",
1595                                ctdb_addr_to_str(&conn->dst)));
1596                         continue;
1597                 }
1598
1599                 /* If the IP address is hosted on this node then
1600                  * remove the connection. */
1601                 if (vnn->pnn == client->ctdb->pnn) {
1602                         ctdb_remove_connection(vnn, conn);
1603                 }
1604
1605                 /* Otherwise this function has been called because the
1606                  * server IP address has been released to another node
1607                  * and the client has exited.  This means that we
1608                  * should not delete the connection information.  The
1609                  * takeover node processes connections too. */
1610         }
1611 }
1612
1613
1614 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1615 {
1616         struct ctdb_vnn *vnn, *next;
1617         int count = 0;
1618
1619         if (ctdb_config.failover_disabled == 1) {
1620                 return;
1621         }
1622
1623         for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1624                 /* vnn can be freed below in release_ip_post() */
1625                 next = vnn->next;
1626
1627                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1628                         ctdb_vnn_unassign_iface(ctdb, vnn);
1629                         continue;
1630                 }
1631
1632                 /* Don't allow multiple releases at once.  Some code,
1633                  * particularly ctdb_tickle_sentenced_connections() is
1634                  * not re-entrant */
1635                 if (vnn->update_in_flight) {
1636                         DEBUG(DEBUG_WARNING,
1637                               (__location__
1638                                " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1639                                     ctdb_addr_to_str(&vnn->public_address),
1640                                     vnn->public_netmask_bits,
1641                                     ctdb_vnn_iface_string(vnn)));
1642                         continue;
1643                 }
1644                 vnn->update_in_flight = true;
1645
1646                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1647                                     ctdb_addr_to_str(&vnn->public_address),
1648                                     vnn->public_netmask_bits,
1649                                     ctdb_vnn_iface_string(vnn)));
1650
1651                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1652                                        ctdb_vnn_iface_string(vnn),
1653                                        ctdb_addr_to_str(&vnn->public_address),
1654                                        vnn->public_netmask_bits);
1655                 /* releaseip timeouts are converted to success, so to
1656                  * detect failures just check if the IP address is
1657                  * still there...
1658                  */
1659                 if (ctdb_sys_have_ip(&vnn->public_address)) {
1660                         DEBUG(DEBUG_ERR,
1661                               (__location__
1662                                " IP address %s not released\n",
1663                                ctdb_addr_to_str(&vnn->public_address)));
1664                         vnn->update_in_flight = false;
1665                         continue;
1666                 }
1667
1668                 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1669                 if (vnn != NULL) {
1670                         vnn->update_in_flight = false;
1671                 }
1672                 count++;
1673         }
1674
1675         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1676 }
1677
1678
1679 /*
1680   get list of public IPs
1681  */
1682 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1683                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
1684 {
1685         int i, num, len;
1686         struct ctdb_public_ip_list_old *ips;
1687         struct ctdb_vnn *vnn;
1688         bool only_available = false;
1689
1690         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1691                 only_available = true;
1692         }
1693
1694         /* count how many public ip structures we have */
1695         num = 0;
1696         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1697                 num++;
1698         }
1699
1700         len = offsetof(struct ctdb_public_ip_list_old, ips) +
1701                 num*sizeof(struct ctdb_public_ip);
1702         ips = talloc_zero_size(outdata, len);
1703         CTDB_NO_MEMORY(ctdb, ips);
1704
1705         i = 0;
1706         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1707                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1708                         continue;
1709                 }
1710                 ips->ips[i].pnn  = vnn->pnn;
1711                 ips->ips[i].addr = vnn->public_address;
1712                 i++;
1713         }
1714         ips->num = i;
1715         len = offsetof(struct ctdb_public_ip_list_old, ips) +
1716                 i*sizeof(struct ctdb_public_ip);
1717
1718         outdata->dsize = len;
1719         outdata->dptr  = (uint8_t *)ips;
1720
1721         return 0;
1722 }
1723
1724
1725 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1726                                         struct ctdb_req_control_old *c,
1727                                         TDB_DATA indata,
1728                                         TDB_DATA *outdata)
1729 {
1730         int i, num, len;
1731         ctdb_sock_addr *addr;
1732         struct ctdb_public_ip_info_old *info;
1733         struct ctdb_vnn *vnn;
1734         struct vnn_interface *iface;
1735
1736         addr = (ctdb_sock_addr *)indata.dptr;
1737
1738         vnn = find_public_ip_vnn(ctdb, addr);
1739         if (vnn == NULL) {
1740                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1741                                  "'%s'not a public address\n",
1742                                  ctdb_addr_to_str(addr)));
1743                 return -1;
1744         }
1745
1746         /* count how many public ip structures we have */
1747         num = 0;
1748         for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1749                 num++;
1750         }
1751
1752         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1753                 num*sizeof(struct ctdb_iface);
1754         info = talloc_zero_size(outdata, len);
1755         CTDB_NO_MEMORY(ctdb, info);
1756
1757         info->ip.addr = vnn->public_address;
1758         info->ip.pnn = vnn->pnn;
1759         info->active_idx = 0xFFFFFFFF;
1760
1761         i = 0;
1762         for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1763                 struct ctdb_interface *cur;
1764
1765                 cur = iface->iface;
1766                 if (vnn->iface == cur) {
1767                         info->active_idx = i;
1768                 }
1769                 strncpy(info->ifaces[i].name, cur->name,
1770                         sizeof(info->ifaces[i].name));
1771                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1772                 info->ifaces[i].link_state = cur->link_up;
1773                 info->ifaces[i].references = cur->references;
1774
1775                 i++;
1776         }
1777         info->num = i;
1778         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1779                 i*sizeof(struct ctdb_iface);
1780
1781         outdata->dsize = len;
1782         outdata->dptr  = (uint8_t *)info;
1783
1784         return 0;
1785 }
1786
1787 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1788                                 struct ctdb_req_control_old *c,
1789                                 TDB_DATA *outdata)
1790 {
1791         int i, num, len;
1792         struct ctdb_iface_list_old *ifaces;
1793         struct ctdb_interface *cur;
1794
1795         /* count how many public ip structures we have */
1796         num = 0;
1797         for (cur=ctdb->ifaces;cur;cur=cur->next) {
1798                 num++;
1799         }
1800
1801         len = offsetof(struct ctdb_iface_list_old, ifaces) +
1802                 num*sizeof(struct ctdb_iface);
1803         ifaces = talloc_zero_size(outdata, len);
1804         CTDB_NO_MEMORY(ctdb, ifaces);
1805
1806         i = 0;
1807         for (cur=ctdb->ifaces;cur;cur=cur->next) {
1808                 strncpy(ifaces->ifaces[i].name, cur->name,
1809                         sizeof(ifaces->ifaces[i].name));
1810                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1811                 ifaces->ifaces[i].link_state = cur->link_up;
1812                 ifaces->ifaces[i].references = cur->references;
1813                 i++;
1814         }
1815         ifaces->num = i;
1816         len = offsetof(struct ctdb_iface_list_old, ifaces) +
1817                 i*sizeof(struct ctdb_iface);
1818
1819         outdata->dsize = len;
1820         outdata->dptr  = (uint8_t *)ifaces;
1821
1822         return 0;
1823 }
1824
1825 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1826                                     struct ctdb_req_control_old *c,
1827                                     TDB_DATA indata)
1828 {
1829         struct ctdb_iface *info;
1830         struct ctdb_interface *iface;
1831         bool link_up = false;
1832
1833         info = (struct ctdb_iface *)indata.dptr;
1834
1835         if (info->name[CTDB_IFACE_SIZE] != '\0') {
1836                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1837                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1838                                   len, len, info->name));
1839                 return -1;
1840         }
1841
1842         switch (info->link_state) {
1843         case 0:
1844                 link_up = false;
1845                 break;
1846         case 1:
1847                 link_up = true;
1848                 break;
1849         default:
1850                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1851                                   (unsigned int)info->link_state));
1852                 return -1;
1853         }
1854
1855         if (info->references != 0) {
1856                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1857                                   (unsigned int)info->references));
1858                 return -1;
1859         }
1860
1861         iface = ctdb_find_iface(ctdb, info->name);
1862         if (iface == NULL) {
1863                 return -1;
1864         }
1865
1866         if (link_up == iface->link_up) {
1867                 return 0;
1868         }
1869
1870         DEBUG(DEBUG_ERR,
1871               ("iface[%s] has changed it's link status %s => %s\n",
1872                iface->name,
1873                iface->link_up?"up":"down",
1874                link_up?"up":"down"));
1875
1876         iface->link_up = link_up;
1877         return 0;
1878 }
1879
1880
1881 /*
1882   called by a daemon to inform us of the entire list of TCP tickles for
1883   a particular public address.
1884   this control should only be sent by the node that is currently serving
1885   that public address.
1886  */
1887 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1888 {
1889         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1890         struct ctdb_tcp_array *tcparray;
1891         struct ctdb_vnn *vnn;
1892
1893         /* We must at least have tickles.num or else we cant verify the size
1894            of the received data blob
1895          */
1896         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1897                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1898                 return -1;
1899         }
1900
1901         /* verify that the size of data matches what we expect */
1902         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1903                          + sizeof(struct ctdb_connection) * list->num) {
1904                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1905                 return -1;
1906         }
1907
1908         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1909                            ctdb_addr_to_str(&list->addr)));
1910
1911         vnn = find_public_ip_vnn(ctdb, &list->addr);
1912         if (vnn == NULL) {
1913                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1914                         ctdb_addr_to_str(&list->addr)));
1915
1916                 return 1;
1917         }
1918
1919         if (vnn->pnn == ctdb->pnn) {
1920                 DEBUG(DEBUG_INFO,
1921                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1922                        ctdb_addr_to_str(&list->addr)));
1923                 return 0;
1924         }
1925
1926         /* remove any old ticklelist we might have */
1927         talloc_free(vnn->tcp_array);
1928         vnn->tcp_array = NULL;
1929
1930         tcparray = talloc(vnn, struct ctdb_tcp_array);
1931         CTDB_NO_MEMORY(ctdb, tcparray);
1932
1933         tcparray->num = list->num;
1934
1935         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1936         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1937
1938         memcpy(tcparray->connections, &list->connections[0],
1939                sizeof(struct ctdb_connection)*tcparray->num);
1940
1941         /* We now have a new fresh tickle list array for this vnn */
1942         vnn->tcp_array = tcparray;
1943
1944         return 0;
1945 }
1946
1947 /*
1948   called to return the full list of tickles for the puclic address associated 
1949   with the provided vnn
1950  */
1951 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1952 {
1953         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1954         struct ctdb_tickle_list_old *list;
1955         struct ctdb_tcp_array *tcparray;
1956         unsigned int num, i;
1957         struct ctdb_vnn *vnn;
1958         unsigned port;
1959
1960         vnn = find_public_ip_vnn(ctdb, addr);
1961         if (vnn == NULL) {
1962                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1963                         ctdb_addr_to_str(addr)));
1964
1965                 return 1;
1966         }
1967
1968         port = ctdb_addr_to_port(addr);
1969
1970         tcparray = vnn->tcp_array;
1971         num = 0;
1972         if (tcparray != NULL) {
1973                 if (port == 0) {
1974                         /* All connections */
1975                         num = tcparray->num;
1976                 } else {
1977                         /* Count connections for port */
1978                         for (i = 0; i < tcparray->num; i++) {
1979                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1980                                         num++;
1981                                 }
1982                         }
1983                 }
1984         }
1985
1986         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
1987                         + sizeof(struct ctdb_connection) * num;
1988
1989         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1990         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1991         list = (struct ctdb_tickle_list_old *)outdata->dptr;
1992
1993         list->addr = *addr;
1994         list->num = num;
1995
1996         if (num == 0) {
1997                 return 0;
1998         }
1999
2000         num = 0;
2001         for (i = 0; i < tcparray->num; i++) {
2002                 if (port == 0 || \
2003                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2004                         list->connections[num] = tcparray->connections[i];
2005                         num++;
2006                 }
2007         }
2008
2009         return 0;
2010 }
2011
2012
2013 /*
2014   set the list of all tcp tickles for a public address
2015  */
2016 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2017                                             ctdb_sock_addr *addr,
2018                                             struct ctdb_tcp_array *tcparray)
2019 {
2020         int ret, num;
2021         TDB_DATA data;
2022         struct ctdb_tickle_list_old *list;
2023
2024         if (tcparray) {
2025                 num = tcparray->num;
2026         } else {
2027                 num = 0;
2028         }
2029
2030         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2031                         sizeof(struct ctdb_connection) * num;
2032         data.dptr = talloc_size(ctdb, data.dsize);
2033         CTDB_NO_MEMORY(ctdb, data.dptr);
2034
2035         list = (struct ctdb_tickle_list_old *)data.dptr;
2036         list->addr = *addr;
2037         list->num = num;
2038         if (tcparray) {
2039                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2040         }
2041
2042         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2043                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2044                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2045         if (ret != 0) {
2046                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2047                 return -1;
2048         }
2049
2050         talloc_free(data.dptr);
2051
2052         return ret;
2053 }
2054
2055 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
2056                                               bool force)
2057 {
2058         struct ctdb_vnn *vnn;
2059         int ret;
2060
2061         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2062                 /* we only send out updates for public addresses that
2063                    we have taken over
2064                  */
2065                 if (ctdb->pnn != vnn->pnn) {
2066                         continue;
2067                 }
2068
2069                 /* We only send out the updates if we need to */
2070                 if (!force && !vnn->tcp_update_needed) {
2071                         continue;
2072                 }
2073
2074                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2075                                                        &vnn->public_address,
2076                                                        vnn->tcp_array);
2077                 if (ret != 0) {
2078                         D_ERR("Failed to send the tickle update for ip %s\n",
2079                               ctdb_addr_to_str(&vnn->public_address));
2080                         vnn->tcp_update_needed = true;
2081                 } else {
2082                         D_INFO("Sent tickle update for ip %s\n",
2083                                ctdb_addr_to_str(&vnn->public_address));
2084                         vnn->tcp_update_needed = false;
2085                 }
2086         }
2087
2088 }
2089
2090 /*
2091   perform tickle updates if required
2092  */
2093 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2094                                     struct tevent_timer *te,
2095                                     struct timeval t, void *private_data)
2096 {
2097         struct ctdb_context *ctdb = talloc_get_type(
2098                 private_data, struct ctdb_context);
2099
2100         ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2101
2102         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2103                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2104                          ctdb_update_tcp_tickles, ctdb);
2105 }
2106
2107 /*
2108   start periodic update of tcp tickles
2109  */
2110 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2111 {
2112         ctdb->tickle_update_context = talloc_new(ctdb);
2113
2114         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2115                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2116                          ctdb_update_tcp_tickles, ctdb);
2117 }
2118
2119
2120
2121
2122 struct control_gratious_arp {
2123         struct ctdb_context *ctdb;
2124         ctdb_sock_addr addr;
2125         const char *iface;
2126         int count;
2127 };
2128
2129 /*
2130   send a control_gratuitous arp
2131  */
2132 static void send_gratious_arp(struct tevent_context *ev,
2133                               struct tevent_timer *te,
2134                               struct timeval t, void *private_data)
2135 {
2136         int ret;
2137         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2138                                                         struct control_gratious_arp);
2139
2140         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2141         if (ret != 0) {
2142                 DBG_ERR("Failed to send gratuitous ARP on iface %s: %s\n",
2143                         arp->iface, strerror(ret));
2144         }
2145
2146
2147         arp->count++;
2148         if (arp->count == CTDB_ARP_REPEAT) {
2149                 talloc_free(arp);
2150                 return;
2151         }
2152
2153         tevent_add_timer(arp->ctdb->ev, arp,
2154                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2155                          send_gratious_arp, arp);
2156 }
2157
2158
2159 /*
2160   send a gratious arp 
2161  */
2162 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2163 {
2164         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2165         struct control_gratious_arp *arp;
2166
2167         /* verify the size of indata */
2168         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2169                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2170                                  (unsigned)indata.dsize, 
2171                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2172                 return -1;
2173         }
2174         if (indata.dsize != 
2175                 ( offsetof(struct ctdb_addr_info_old, iface)
2176                 + gratious_arp->len ) ){
2177
2178                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2179                         "but should be %u bytes\n", 
2180                          (unsigned)indata.dsize, 
2181                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2182                 return -1;
2183         }
2184
2185
2186         arp = talloc(ctdb, struct control_gratious_arp);
2187         CTDB_NO_MEMORY(ctdb, arp);
2188
2189         arp->ctdb  = ctdb;
2190         arp->addr   = gratious_arp->addr;
2191         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2192         CTDB_NO_MEMORY(ctdb, arp->iface);
2193         arp->count = 0;
2194
2195         tevent_add_timer(arp->ctdb->ev, arp,
2196                          timeval_zero(), send_gratious_arp, arp);
2197
2198         return 0;
2199 }
2200
2201 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2202 {
2203         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2204         int ret;
2205
2206         /* verify the size of indata */
2207         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2208                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2209                 return -1;
2210         }
2211         if (indata.dsize != 
2212                 ( offsetof(struct ctdb_addr_info_old, iface)
2213                 + pub->len ) ){
2214
2215                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2216                         "but should be %u bytes\n", 
2217                          (unsigned)indata.dsize, 
2218                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2219                 return -1;
2220         }
2221
2222         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2223
2224         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2225
2226         if (ret != 0) {
2227                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2228                 return -1;
2229         }
2230
2231         return 0;
2232 }
2233
2234 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2235 {
2236         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2237         struct ctdb_vnn *vnn;
2238
2239         /* verify the size of indata */
2240         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2241                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2242                 return -1;
2243         }
2244         if (indata.dsize != 
2245                 ( offsetof(struct ctdb_addr_info_old, iface)
2246                 + pub->len ) ){
2247
2248                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2249                         "but should be %u bytes\n", 
2250                          (unsigned)indata.dsize, 
2251                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2252                 return -1;
2253         }
2254
2255         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2256
2257         /* walk over all public addresses until we find a match */
2258         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2259                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2260                         if (vnn->pnn == ctdb->pnn) {
2261                                 /* This IP is currently being hosted.
2262                                  * Defer the deletion until the next
2263                                  * takeover run. "ctdb reloadips" will
2264                                  * always cause a takeover run.  "ctdb
2265                                  * delip" will now need an explicit
2266                                  * "ctdb ipreallocated" afterwards. */
2267                                 vnn->delete_pending = true;
2268                         } else {
2269                                 /* This IP is not hosted on the
2270                                  * current node so just delete it
2271                                  * now. */
2272                                 do_delete_ip(ctdb, vnn);
2273                         }
2274
2275                         return 0;
2276                 }
2277         }
2278
2279         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2280                          ctdb_addr_to_str(&pub->addr)));
2281         return -1;
2282 }
2283
2284
2285 struct ipreallocated_callback_state {
2286         struct ctdb_req_control_old *c;
2287 };
2288
2289 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2290                                         int status, void *p)
2291 {
2292         struct ipreallocated_callback_state *state =
2293                 talloc_get_type(p, struct ipreallocated_callback_state);
2294
2295         if (status != 0) {
2296                 DEBUG(DEBUG_ERR,
2297                       (" \"ipreallocated\" event script failed (status %d)\n",
2298                        status));
2299                 if (status == -ETIMEDOUT) {
2300                         ctdb_ban_self(ctdb);
2301                 }
2302         }
2303
2304         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2305         talloc_free(state);
2306 }
2307
2308 /* A control to run the ipreallocated event */
2309 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2310                                    struct ctdb_req_control_old *c,
2311                                    bool *async_reply)
2312 {
2313         int ret;
2314         struct ipreallocated_callback_state *state;
2315
2316         state = talloc(ctdb, struct ipreallocated_callback_state);
2317         CTDB_NO_MEMORY(ctdb, state);
2318
2319         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2320
2321         ret = ctdb_event_script_callback(ctdb, state,
2322                                          ctdb_ipreallocated_callback, state,
2323                                          CTDB_EVENT_IPREALLOCATED,
2324                                          "%s", "");
2325
2326         if (ret != 0) {
2327                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2328                 talloc_free(state);
2329                 return -1;
2330         }
2331
2332         /* tell the control that we will be reply asynchronously */
2333         state->c    = talloc_steal(state, c);
2334         *async_reply = true;
2335
2336         return 0;
2337 }
2338
2339
2340 struct ctdb_reloadips_handle {
2341         struct ctdb_context *ctdb;
2342         struct ctdb_req_control_old *c;
2343         int status;
2344         int fd[2];
2345         pid_t child;
2346         struct tevent_fd *fde;
2347 };
2348
2349 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2350 {
2351         if (h == h->ctdb->reload_ips) {
2352                 h->ctdb->reload_ips = NULL;
2353         }
2354         if (h->c != NULL) {
2355                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2356                 h->c = NULL;
2357         }
2358         ctdb_kill(h->ctdb, h->child, SIGKILL);
2359         return 0;
2360 }
2361
2362 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2363                                          struct tevent_timer *te,
2364                                          struct timeval t, void *private_data)
2365 {
2366         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2367
2368         talloc_free(h);
2369 }
2370
2371 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2372                                          struct tevent_fd *fde,
2373                                          uint16_t flags, void *private_data)
2374 {
2375         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2376
2377         char res;
2378         int ret;
2379
2380         ret = sys_read(h->fd[0], &res, 1);
2381         if (ret < 1 || res != 0) {
2382                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2383                 res = 1;
2384         }
2385         h->status = res;
2386
2387         talloc_free(h);
2388 }
2389
2390 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2391 {
2392         TALLOC_CTX *mem_ctx = talloc_new(NULL);
2393         struct ctdb_public_ip_list_old *ips;
2394         struct ctdb_vnn *vnn;
2395         struct client_async_data *async_data;
2396         struct timeval timeout;
2397         TDB_DATA data;
2398         struct ctdb_client_control_state *state;
2399         bool first_add;
2400         unsigned int i;
2401         int ret;
2402
2403         CTDB_NO_MEMORY(ctdb, mem_ctx);
2404
2405         /* Read IPs from local node */
2406         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2407                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
2408         if (ret != 0) {
2409                 DEBUG(DEBUG_ERR,
2410                       ("Unable to fetch public IPs from local node\n"));
2411                 talloc_free(mem_ctx);
2412                 return -1;
2413         }
2414
2415         /* Read IPs file - this is safe since this is a child process */
2416         ctdb->vnn = NULL;
2417         if (ctdb_set_public_addresses(ctdb, false) != 0) {
2418                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2419                 talloc_free(mem_ctx);
2420                 return -1;
2421         }
2422
2423         async_data = talloc_zero(mem_ctx, struct client_async_data);
2424         CTDB_NO_MEMORY(ctdb, async_data);
2425
2426         /* Compare IPs between node and file for IPs to be deleted */
2427         for (i = 0; i < ips->num; i++) {
2428                 /* */
2429                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2430                         if (ctdb_same_ip(&vnn->public_address,
2431                                          &ips->ips[i].addr)) {
2432                                 /* IP is still in file */
2433                                 break;
2434                         }
2435                 }
2436
2437                 if (vnn == NULL) {
2438                         /* Delete IP ips->ips[i] */
2439                         struct ctdb_addr_info_old *pub;
2440
2441                         DEBUG(DEBUG_NOTICE,
2442                               ("IP %s no longer configured, deleting it\n",
2443                                ctdb_addr_to_str(&ips->ips[i].addr)));
2444
2445                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2446                         CTDB_NO_MEMORY(ctdb, pub);
2447
2448                         pub->addr  = ips->ips[i].addr;
2449                         pub->mask  = 0;
2450                         pub->len   = 0;
2451
2452                         timeout = TAKEOVER_TIMEOUT();
2453
2454                         data.dsize = offsetof(struct ctdb_addr_info_old,
2455                                               iface) + pub->len;
2456                         data.dptr = (uint8_t *)pub;
2457
2458                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2459                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
2460                                                   0, data, async_data,
2461                                                   &timeout, NULL);
2462                         if (state == NULL) {
2463                                 DEBUG(DEBUG_ERR,
2464                                       (__location__
2465                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2466                                 goto failed;
2467                         }
2468
2469                         ctdb_client_async_add(async_data, state);
2470                 }
2471         }
2472
2473         /* Compare IPs between node and file for IPs to be added */
2474         first_add = true;
2475         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2476                 for (i = 0; i < ips->num; i++) {
2477                         if (ctdb_same_ip(&vnn->public_address,
2478                                          &ips->ips[i].addr)) {
2479                                 /* IP already on node */
2480                                 break;
2481                         }
2482                 }
2483                 if (i == ips->num) {
2484                         /* Add IP ips->ips[i] */
2485                         struct ctdb_addr_info_old *pub;
2486                         const char *ifaces = NULL;
2487                         uint32_t len;
2488                         struct vnn_interface *iface = NULL;
2489
2490                         DEBUG(DEBUG_NOTICE,
2491                               ("New IP %s configured, adding it\n",
2492                                ctdb_addr_to_str(&vnn->public_address)));
2493                         if (first_add) {
2494                                 uint32_t pnn = ctdb_get_pnn(ctdb);
2495
2496                                 data.dsize = sizeof(pnn);
2497                                 data.dptr  = (uint8_t *)&pnn;
2498
2499                                 ret = ctdb_client_send_message(
2500                                         ctdb,
2501                                         CTDB_BROADCAST_CONNECTED,
2502                                         CTDB_SRVID_REBALANCE_NODE,
2503                                         data);
2504                                 if (ret != 0) {
2505                                         DEBUG(DEBUG_WARNING,
2506                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2507                                 }
2508
2509                                 first_add = false;
2510                         }
2511
2512                         ifaces = vnn->ifaces->iface->name;
2513                         iface = vnn->ifaces->next;
2514                         while (iface != NULL) {
2515                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2516                                                          iface->iface->name);
2517                                 iface = iface->next;
2518                         }
2519
2520                         len   = strlen(ifaces) + 1;
2521                         pub = talloc_zero_size(mem_ctx,
2522                                                offsetof(struct ctdb_addr_info_old, iface) + len);
2523                         CTDB_NO_MEMORY(ctdb, pub);
2524
2525                         pub->addr  = vnn->public_address;
2526                         pub->mask  = vnn->public_netmask_bits;
2527                         pub->len   = len;
2528                         memcpy(&pub->iface[0], ifaces, pub->len);
2529
2530                         timeout = TAKEOVER_TIMEOUT();
2531
2532                         data.dsize = offsetof(struct ctdb_addr_info_old,
2533                                               iface) + pub->len;
2534                         data.dptr = (uint8_t *)pub;
2535
2536                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2537                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
2538                                                   0, data, async_data,
2539                                                   &timeout, NULL);
2540                         if (state == NULL) {
2541                                 DEBUG(DEBUG_ERR,
2542                                       (__location__
2543                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2544                                 goto failed;
2545                         }
2546
2547                         ctdb_client_async_add(async_data, state);
2548                 }
2549         }
2550
2551         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2552                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2553                 goto failed;
2554         }
2555
2556         talloc_free(mem_ctx);
2557         return 0;
2558
2559 failed:
2560         talloc_free(mem_ctx);
2561         return -1;
2562 }
2563
2564 /* This control is sent to force the node to re-read the public addresses file
2565    and drop any addresses we should nnot longer host, and add new addresses
2566    that we are now able to host
2567 */
2568 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2569 {
2570         struct ctdb_reloadips_handle *h;
2571         pid_t parent = getpid();
2572
2573         if (ctdb->reload_ips != NULL) {
2574                 talloc_free(ctdb->reload_ips);
2575                 ctdb->reload_ips = NULL;
2576         }
2577
2578         h = talloc(ctdb, struct ctdb_reloadips_handle);
2579         CTDB_NO_MEMORY(ctdb, h);
2580         h->ctdb     = ctdb;
2581         h->c        = NULL;
2582         h->status   = -1;
2583         
2584         if (pipe(h->fd) == -1) {
2585                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2586                 talloc_free(h);
2587                 return -1;
2588         }
2589
2590         h->child = ctdb_fork(ctdb);
2591         if (h->child == (pid_t)-1) {
2592                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2593                 close(h->fd[0]);
2594                 close(h->fd[1]);
2595                 talloc_free(h);
2596                 return -1;
2597         }
2598
2599         /* child process */
2600         if (h->child == 0) {
2601                 signed char res = 0;
2602
2603                 close(h->fd[0]);
2604
2605                 prctl_set_comment("ctdb_reloadips");
2606                 if (switch_from_server_to_client(ctdb) != 0) {
2607                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2608                         res = -1;
2609                 } else {
2610                         res = ctdb_reloadips_child(ctdb);
2611                         if (res != 0) {
2612                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2613                         }
2614                 }
2615
2616                 sys_write(h->fd[1], &res, 1);
2617                 ctdb_wait_for_process_to_exit(parent);
2618                 _exit(0);
2619         }
2620
2621         h->c             = talloc_steal(h, c);
2622
2623         close(h->fd[1]);
2624         set_close_on_exec(h->fd[0]);
2625
2626         talloc_set_destructor(h, ctdb_reloadips_destructor);
2627
2628
2629         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2630                                ctdb_reloadips_child_handler, (void *)h);
2631         tevent_fd_set_auto_close(h->fde);
2632
2633         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2634                          ctdb_reloadips_timeout_event, h);
2635
2636         /* we reply later */
2637         *async_reply = true;
2638         return 0;
2639 }