ctdb-daemon: Stop inactive/disabled nodes from reporting available IPs
[samba.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
35
36 #include "ctdb_private.h"
37 #include "ctdb_client.h"
38
39 #include "common/rb_tree.h"
40 #include "common/reqid.h"
41 #include "common/system.h"
42 #include "common/system_socket.h"
43 #include "common/common.h"
44 #include "common/logging.h"
45
46 #include "server/ipalloc.h"
47
48 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
49
50 #define CTDB_ARP_INTERVAL 1
51 #define CTDB_ARP_REPEAT   3
52
53 struct ctdb_interface {
54         struct ctdb_interface *prev, *next;
55         const char *name;
56         bool link_up;
57         uint32_t references;
58 };
59
60 struct vnn_interface {
61         struct vnn_interface *prev, *next;
62         struct ctdb_interface *iface;
63 };
64
65 /* state associated with a public ip address */
66 struct ctdb_vnn {
67         struct ctdb_vnn *prev, *next;
68
69         struct ctdb_interface *iface;
70         struct vnn_interface *ifaces;
71         ctdb_sock_addr public_address;
72         uint8_t public_netmask_bits;
73
74         /* the node number that is serving this public address, if any.
75            If no node serves this ip it is set to -1 */
76         int32_t pnn;
77
78         /* List of clients to tickle for this public address */
79         struct ctdb_tcp_array *tcp_array;
80
81         /* whether we need to update the other nodes with changes to our list
82            of connected clients */
83         bool tcp_update_needed;
84
85         /* a context to hang sending gratious arp events off */
86         TALLOC_CTX *takeover_ctx;
87
88         /* Set to true any time an update to this VNN is in flight.
89            This helps to avoid races. */
90         bool update_in_flight;
91
92         /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
93          * address then this flag is set.  It will be deleted in the
94          * release IP callback. */
95         bool delete_pending;
96 };
97
98 static const char *iface_string(const struct ctdb_interface *iface)
99 {
100         return (iface != NULL ? iface->name : "__none__");
101 }
102
103 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
104 {
105         return iface_string(vnn->iface);
106 }
107
108 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
109                                               const char *iface);
110
111 static struct ctdb_interface *
112 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
113 {
114         struct ctdb_interface *i;
115
116         if (strlen(iface) > CTDB_IFACE_SIZE) {
117                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
118                 return NULL;
119         }
120
121         /* Verify that we don't have an entry for this ip yet */
122         i = ctdb_find_iface(ctdb, iface);
123         if (i != NULL) {
124                 return i;
125         }
126
127         /* create a new structure for this interface */
128         i = talloc_zero(ctdb, struct ctdb_interface);
129         if (i == NULL) {
130                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
131                 return NULL;
132         }
133         i->name = talloc_strdup(i, iface);
134         if (i->name == NULL) {
135                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
136                 talloc_free(i);
137                 return NULL;
138         }
139
140         i->link_up = true;
141
142         DLIST_ADD(ctdb->ifaces, i);
143
144         return i;
145 }
146
147 static bool vnn_has_interface(struct ctdb_vnn *vnn,
148                               const struct ctdb_interface *iface)
149 {
150         struct vnn_interface *i;
151
152         for (i = vnn->ifaces; i != NULL; i = i->next) {
153                 if (iface == i->iface) {
154                         return true;
155                 }
156         }
157
158         return false;
159 }
160
161 /* If any interfaces now have no possible IPs then delete them.  This
162  * implementation is naive (i.e. simple) rather than clever
163  * (i.e. complex).  Given that this is run on delip and that operation
164  * is rare, this doesn't need to be efficient - it needs to be
165  * foolproof.  One alternative is reference counting, where the logic
166  * is distributed and can, therefore, be broken in multiple places.
167  * Another alternative is to build a red-black tree of interfaces that
168  * can have addresses (by walking ctdb->vnn once) and then walking
169  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
170  * one of those if the naive implementation causes problems...  :-)
171  */
172 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
173                                         struct ctdb_vnn *vnn)
174 {
175         struct ctdb_interface *i, *next;
176
177         /* For each interface, check if there's an IP using it. */
178         for (i = ctdb->ifaces; i != NULL; i = next) {
179                 struct ctdb_vnn *tv;
180                 bool found;
181                 next = i->next;
182
183                 /* Only consider interfaces named in the given VNN. */
184                 if (!vnn_has_interface(vnn, i)) {
185                         continue;
186                 }
187
188                 /* Search for a vnn with this interface. */
189                 found = false;
190                 for (tv=ctdb->vnn; tv; tv=tv->next) {
191                         if (vnn_has_interface(tv, i)) {
192                                 found = true;
193                                 break;
194                         }
195                 }
196
197                 if (!found) {
198                         /* None of the VNNs are using this interface. */
199                         DLIST_REMOVE(ctdb->ifaces, i);
200                         talloc_free(i);
201                 }
202         }
203 }
204
205
206 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
207                                               const char *iface)
208 {
209         struct ctdb_interface *i;
210
211         for (i=ctdb->ifaces;i;i=i->next) {
212                 if (strcmp(i->name, iface) == 0) {
213                         return i;
214                 }
215         }
216
217         return NULL;
218 }
219
220 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
221                                                   struct ctdb_vnn *vnn)
222 {
223         struct vnn_interface *i;
224         struct ctdb_interface *cur = NULL;
225         struct ctdb_interface *best = NULL;
226
227         for (i = vnn->ifaces; i != NULL; i = i->next) {
228
229                 cur = i->iface;
230
231                 if (!cur->link_up) {
232                         continue;
233                 }
234
235                 if (best == NULL) {
236                         best = cur;
237                         continue;
238                 }
239
240                 if (cur->references < best->references) {
241                         best = cur;
242                         continue;
243                 }
244         }
245
246         return best;
247 }
248
249 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
250                                      struct ctdb_vnn *vnn)
251 {
252         struct ctdb_interface *best = NULL;
253
254         if (vnn->iface) {
255                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
256                                    "still assigned to iface '%s'\n",
257                                    ctdb_addr_to_str(&vnn->public_address),
258                                    ctdb_vnn_iface_string(vnn)));
259                 return 0;
260         }
261
262         best = ctdb_vnn_best_iface(ctdb, vnn);
263         if (best == NULL) {
264                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
265                                   "cannot assign to iface any iface\n",
266                                   ctdb_addr_to_str(&vnn->public_address)));
267                 return -1;
268         }
269
270         vnn->iface = best;
271         best->references++;
272         vnn->pnn = ctdb->pnn;
273
274         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
275                            "now assigned to iface '%s' refs[%d]\n",
276                            ctdb_addr_to_str(&vnn->public_address),
277                            ctdb_vnn_iface_string(vnn),
278                            best->references));
279         return 0;
280 }
281
282 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
283                                     struct ctdb_vnn *vnn)
284 {
285         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
286                            "now unassigned (old iface '%s' refs[%d])\n",
287                            ctdb_addr_to_str(&vnn->public_address),
288                            ctdb_vnn_iface_string(vnn),
289                            vnn->iface?vnn->iface->references:0));
290         if (vnn->iface) {
291                 vnn->iface->references--;
292         }
293         vnn->iface = NULL;
294         if (vnn->pnn == ctdb->pnn) {
295                 vnn->pnn = -1;
296         }
297 }
298
299 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
300                                struct ctdb_vnn *vnn)
301 {
302         uint32_t flags;
303         struct vnn_interface *i;
304
305         /* Nodes that are not RUNNING can not host IPs */
306         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
307                 return false;
308         }
309
310         flags = ctdb->nodes[ctdb->pnn]->flags;
311         if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
312                 return false;
313         }
314
315         if (vnn->delete_pending) {
316                 return false;
317         }
318
319         if (vnn->iface && vnn->iface->link_up) {
320                 return true;
321         }
322
323         for (i = vnn->ifaces; i != NULL; i = i->next) {
324                 if (i->iface->link_up) {
325                         return true;
326                 }
327         }
328
329         return false;
330 }
331
332 struct ctdb_takeover_arp {
333         struct ctdb_context *ctdb;
334         uint32_t count;
335         ctdb_sock_addr addr;
336         struct ctdb_tcp_array *tcparray;
337         struct ctdb_vnn *vnn;
338 };
339
340
341 /*
342   lists of tcp endpoints
343  */
344 struct ctdb_tcp_list {
345         struct ctdb_tcp_list *prev, *next;
346         struct ctdb_connection connection;
347 };
348
349 /*
350   list of clients to kill on IP release
351  */
352 struct ctdb_client_ip {
353         struct ctdb_client_ip *prev, *next;
354         struct ctdb_context *ctdb;
355         ctdb_sock_addr addr;
356         uint32_t client_id;
357 };
358
359
360 /*
361   send a gratuitous arp
362  */
363 static void ctdb_control_send_arp(struct tevent_context *ev,
364                                   struct tevent_timer *te,
365                                   struct timeval t, void *private_data)
366 {
367         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
368                                                         struct ctdb_takeover_arp);
369         int i, ret;
370         struct ctdb_tcp_array *tcparray;
371         const char *iface = ctdb_vnn_iface_string(arp->vnn);
372
373         ret = ctdb_sys_send_arp(&arp->addr, iface);
374         if (ret != 0) {
375                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
376                                   iface, strerror(errno)));
377         }
378
379         tcparray = arp->tcparray;
380         if (tcparray) {
381                 for (i=0;i<tcparray->num;i++) {
382                         struct ctdb_connection *tcon;
383
384                         tcon = &tcparray->connections[i];
385                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
386                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
387                                 ctdb_addr_to_str(&tcon->src),
388                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
389                         ret = ctdb_sys_send_tcp(
390                                 &tcon->src,
391                                 &tcon->dst,
392                                 0, 0, 0);
393                         if (ret != 0) {
394                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
395                                         ctdb_addr_to_str(&tcon->src)));
396                         }
397                 }
398         }
399
400         arp->count++;
401
402         if (arp->count == CTDB_ARP_REPEAT) {
403                 talloc_free(arp);
404                 return;
405         }
406
407         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
408                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
409                          ctdb_control_send_arp, arp);
410 }
411
412 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
413                                        struct ctdb_vnn *vnn)
414 {
415         struct ctdb_takeover_arp *arp;
416         struct ctdb_tcp_array *tcparray;
417
418         if (!vnn->takeover_ctx) {
419                 vnn->takeover_ctx = talloc_new(vnn);
420                 if (!vnn->takeover_ctx) {
421                         return -1;
422                 }
423         }
424
425         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
426         if (!arp) {
427                 return -1;
428         }
429
430         arp->ctdb = ctdb;
431         arp->addr = vnn->public_address;
432         arp->vnn  = vnn;
433
434         tcparray = vnn->tcp_array;
435         if (tcparray) {
436                 /* add all of the known tcp connections for this IP to the
437                    list of tcp connections to send tickle acks for */
438                 arp->tcparray = talloc_steal(arp, tcparray);
439
440                 vnn->tcp_array = NULL;
441                 vnn->tcp_update_needed = true;
442         }
443
444         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
445                          timeval_zero(), ctdb_control_send_arp, arp);
446
447         return 0;
448 }
449
450 struct ctdb_do_takeip_state {
451         struct ctdb_req_control_old *c;
452         struct ctdb_vnn *vnn;
453 };
454
455 /*
456   called when takeip event finishes
457  */
458 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
459                                     void *private_data)
460 {
461         struct ctdb_do_takeip_state *state =
462                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
463         int32_t ret;
464         TDB_DATA data;
465
466         if (status != 0) {
467                 if (status == -ETIME) {
468                         ctdb_ban_self(ctdb);
469                 }
470                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
471                                  ctdb_addr_to_str(&state->vnn->public_address),
472                                  ctdb_vnn_iface_string(state->vnn)));
473                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
474
475                 talloc_free(state);
476                 return;
477         }
478
479         if (ctdb->do_checkpublicip) {
480
481         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
482         if (ret != 0) {
483                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
484                 talloc_free(state);
485                 return;
486         }
487
488         }
489
490         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
491         data.dsize = strlen((char *)data.dptr) + 1;
492         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
493
494         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
495
496
497         /* the control succeeded */
498         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
499         talloc_free(state);
500         return;
501 }
502
503 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
504 {
505         state->vnn->update_in_flight = false;
506         return 0;
507 }
508
509 /*
510   take over an ip address
511  */
512 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
513                               struct ctdb_req_control_old *c,
514                               struct ctdb_vnn *vnn)
515 {
516         int ret;
517         struct ctdb_do_takeip_state *state;
518
519         if (vnn->update_in_flight) {
520                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
521                                     "update for this IP already in flight\n",
522                                     ctdb_addr_to_str(&vnn->public_address),
523                                     vnn->public_netmask_bits));
524                 return -1;
525         }
526
527         ret = ctdb_vnn_assign_iface(ctdb, vnn);
528         if (ret != 0) {
529                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
530                                  "assign a usable interface\n",
531                                  ctdb_addr_to_str(&vnn->public_address),
532                                  vnn->public_netmask_bits));
533                 return -1;
534         }
535
536         state = talloc(vnn, struct ctdb_do_takeip_state);
537         CTDB_NO_MEMORY(ctdb, state);
538
539         state->c = NULL;
540         state->vnn   = vnn;
541
542         vnn->update_in_flight = true;
543         talloc_set_destructor(state, ctdb_takeip_destructor);
544
545         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
546                             ctdb_addr_to_str(&vnn->public_address),
547                             vnn->public_netmask_bits,
548                             ctdb_vnn_iface_string(vnn)));
549
550         ret = ctdb_event_script_callback(ctdb,
551                                          state,
552                                          ctdb_do_takeip_callback,
553                                          state,
554                                          CTDB_EVENT_TAKE_IP,
555                                          "%s %s %u",
556                                          ctdb_vnn_iface_string(vnn),
557                                          ctdb_addr_to_str(&vnn->public_address),
558                                          vnn->public_netmask_bits);
559
560         if (ret != 0) {
561                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
562                         ctdb_addr_to_str(&vnn->public_address),
563                         ctdb_vnn_iface_string(vnn)));
564                 talloc_free(state);
565                 return -1;
566         }
567
568         state->c = talloc_steal(ctdb, c);
569         return 0;
570 }
571
572 struct ctdb_do_updateip_state {
573         struct ctdb_req_control_old *c;
574         struct ctdb_interface *old;
575         struct ctdb_vnn *vnn;
576 };
577
578 /*
579   called when updateip event finishes
580  */
581 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
582                                       void *private_data)
583 {
584         struct ctdb_do_updateip_state *state =
585                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
586
587         if (status != 0) {
588                 if (status == -ETIME) {
589                         ctdb_ban_self(ctdb);
590                 }
591                 DEBUG(DEBUG_ERR,
592                       ("Failed update of IP %s from interface %s to %s\n",
593                        ctdb_addr_to_str(&state->vnn->public_address),
594                        iface_string(state->old),
595                        ctdb_vnn_iface_string(state->vnn)));
596
597                 /*
598                  * All we can do is reset the old interface
599                  * and let the next run fix it
600                  */
601                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
602                 state->vnn->iface = state->old;
603                 state->vnn->iface->references++;
604
605                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
606                 talloc_free(state);
607                 return;
608         }
609
610         /* the control succeeded */
611         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
612         talloc_free(state);
613         return;
614 }
615
616 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
617 {
618         state->vnn->update_in_flight = false;
619         return 0;
620 }
621
622 /*
623   update (move) an ip address
624  */
625 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
626                                 struct ctdb_req_control_old *c,
627                                 struct ctdb_vnn *vnn)
628 {
629         int ret;
630         struct ctdb_do_updateip_state *state;
631         struct ctdb_interface *old = vnn->iface;
632         const char *old_name = iface_string(old);
633         const char *new_name;
634
635         if (vnn->update_in_flight) {
636                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
637                                     "update for this IP already in flight\n",
638                                     ctdb_addr_to_str(&vnn->public_address),
639                                     vnn->public_netmask_bits));
640                 return -1;
641         }
642
643         ctdb_vnn_unassign_iface(ctdb, vnn);
644         ret = ctdb_vnn_assign_iface(ctdb, vnn);
645         if (ret != 0) {
646                 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
647                                  "assign a usable interface (old iface '%s')\n",
648                                  ctdb_addr_to_str(&vnn->public_address),
649                                  vnn->public_netmask_bits,
650                                  old_name));
651                 return -1;
652         }
653
654         if (old == vnn->iface) {
655                 /* A benign update from one interface onto itself.
656                  * no need to run the eventscripts in this case, just return
657                  * success.
658                  */
659                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
660                 return 0;
661         }
662
663         state = talloc(vnn, struct ctdb_do_updateip_state);
664         CTDB_NO_MEMORY(ctdb, state);
665
666         state->c = NULL;
667         state->old = old;
668         state->vnn = vnn;
669
670         vnn->update_in_flight = true;
671         talloc_set_destructor(state, ctdb_updateip_destructor);
672
673         new_name = ctdb_vnn_iface_string(vnn);
674         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
675                             "interface %s to %s\n",
676                             ctdb_addr_to_str(&vnn->public_address),
677                             vnn->public_netmask_bits,
678                             old_name,
679                             new_name));
680
681         ret = ctdb_event_script_callback(ctdb,
682                                          state,
683                                          ctdb_do_updateip_callback,
684                                          state,
685                                          CTDB_EVENT_UPDATE_IP,
686                                          "%s %s %s %u",
687                                          old_name,
688                                          new_name,
689                                          ctdb_addr_to_str(&vnn->public_address),
690                                          vnn->public_netmask_bits);
691         if (ret != 0) {
692                 DEBUG(DEBUG_ERR,
693                       ("Failed update IP %s from interface %s to %s\n",
694                        ctdb_addr_to_str(&vnn->public_address),
695                        old_name, new_name));
696                 talloc_free(state);
697                 return -1;
698         }
699
700         state->c = talloc_steal(ctdb, c);
701         return 0;
702 }
703
704 /*
705   Find the vnn of the node that has a public ip address
706   returns -1 if the address is not known as a public address
707  */
708 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
709 {
710         struct ctdb_vnn *vnn;
711
712         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
713                 if (ctdb_same_ip(&vnn->public_address, addr)) {
714                         return vnn;
715                 }
716         }
717
718         return NULL;
719 }
720
721 /*
722   take over an ip address
723  */
724 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
725                                  struct ctdb_req_control_old *c,
726                                  TDB_DATA indata,
727                                  bool *async_reply)
728 {
729         int ret;
730         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
731         struct ctdb_vnn *vnn;
732         bool have_ip = false;
733         bool do_updateip = false;
734         bool do_takeip = false;
735         struct ctdb_interface *best_iface = NULL;
736
737         if (pip->pnn != ctdb->pnn) {
738                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
739                                  "with pnn %d, but we're node %d\n",
740                                  ctdb_addr_to_str(&pip->addr),
741                                  pip->pnn, ctdb->pnn));
742                 return -1;
743         }
744
745         /* update out vnn list */
746         vnn = find_public_ip_vnn(ctdb, &pip->addr);
747         if (vnn == NULL) {
748                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
749                         ctdb_addr_to_str(&pip->addr)));
750                 return 0;
751         }
752
753         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
754                 have_ip = ctdb_sys_have_ip(&pip->addr);
755         }
756         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
757         if (best_iface == NULL) {
758                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
759                                  "a usable interface (old %s, have_ip %d)\n",
760                                  ctdb_addr_to_str(&vnn->public_address),
761                                  vnn->public_netmask_bits,
762                                  ctdb_vnn_iface_string(vnn),
763                                  have_ip));
764                 return -1;
765         }
766
767         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
768                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
769                                   "and we have it on iface[%s], but it was assigned to node %d"
770                                   "and we are node %d, banning ourself\n",
771                                  ctdb_addr_to_str(&vnn->public_address),
772                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
773                 ctdb_ban_self(ctdb);
774                 return -1;
775         }
776
777         if (vnn->pnn == -1 && have_ip) {
778                 /* This will cause connections to be reset and
779                  * reestablished.  However, this is a very unusual
780                  * situation and doing this will completely repair the
781                  * inconsistency in the VNN.
782                  */
783                 DEBUG(DEBUG_WARNING,
784                       (__location__
785                        " Doing updateip for IP %s already on an interface\n",
786                        ctdb_addr_to_str(&vnn->public_address)));
787                 do_updateip = true;
788         }
789
790         if (vnn->iface) {
791                 if (vnn->iface != best_iface) {
792                         if (!vnn->iface->link_up) {
793                                 do_updateip = true;
794                         } else if (vnn->iface->references > (best_iface->references + 1)) {
795                                 /* only move when the rebalance gains something */
796                                         do_updateip = true;
797                         }
798                 }
799         }
800
801         if (!have_ip) {
802                 if (do_updateip) {
803                         ctdb_vnn_unassign_iface(ctdb, vnn);
804                         do_updateip = false;
805                 }
806                 do_takeip = true;
807         }
808
809         if (do_takeip) {
810                 ret = ctdb_do_takeip(ctdb, c, vnn);
811                 if (ret != 0) {
812                         return -1;
813                 }
814         } else if (do_updateip) {
815                 ret = ctdb_do_updateip(ctdb, c, vnn);
816                 if (ret != 0) {
817                         return -1;
818                 }
819         } else {
820                 /*
821                  * The interface is up and the kernel known the ip
822                  * => do nothing
823                  */
824                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
825                         ctdb_addr_to_str(&pip->addr),
826                         vnn->public_netmask_bits,
827                         ctdb_vnn_iface_string(vnn)));
828                 return 0;
829         }
830
831         /* tell ctdb_control.c that we will be replying asynchronously */
832         *async_reply = true;
833
834         return 0;
835 }
836
837 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
838 {
839         DLIST_REMOVE(ctdb->vnn, vnn);
840         ctdb_vnn_unassign_iface(ctdb, vnn);
841         ctdb_remove_orphaned_ifaces(ctdb, vnn);
842         talloc_free(vnn);
843 }
844
845 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
846                                         struct ctdb_vnn *vnn,
847                                         ctdb_sock_addr *addr)
848 {
849         TDB_DATA data;
850
851         /* Send a message to all clients of this node telling them
852          * that the cluster has been reconfigured and they should
853          * close any connections on this IP address
854          */
855         data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
856         data.dsize = strlen((char *)data.dptr)+1;
857         DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
858         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
859
860         ctdb_vnn_unassign_iface(ctdb, vnn);
861
862         /* Process the IP if it has been marked for deletion */
863         if (vnn->delete_pending) {
864                 do_delete_ip(ctdb, vnn);
865                 return NULL;
866         }
867
868         return vnn;
869 }
870
871 struct release_ip_callback_state {
872         struct ctdb_req_control_old *c;
873         ctdb_sock_addr *addr;
874         struct ctdb_vnn *vnn;
875         uint32_t target_pnn;
876 };
877
878 /*
879   called when releaseip event finishes
880  */
881 static void release_ip_callback(struct ctdb_context *ctdb, int status,
882                                 void *private_data)
883 {
884         struct release_ip_callback_state *state =
885                 talloc_get_type(private_data, struct release_ip_callback_state);
886
887         if (status == -ETIME) {
888                 ctdb_ban_self(ctdb);
889         }
890
891         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
892                 if  (ctdb_sys_have_ip(state->addr)) {
893                         DEBUG(DEBUG_ERR,
894                               ("IP %s still hosted during release IP callback, failing\n",
895                                ctdb_addr_to_str(state->addr)));
896                         ctdb_request_control_reply(ctdb, state->c,
897                                                    NULL, -1, NULL);
898                         talloc_free(state);
899                         return;
900                 }
901         }
902
903         state->vnn->pnn = state->target_pnn;
904         state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
905
906         /* the control succeeded */
907         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
908         talloc_free(state);
909 }
910
911 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
912 {
913         if (state->vnn != NULL) {
914                 state->vnn->update_in_flight = false;
915         }
916         return 0;
917 }
918
919 /*
920   release an ip address
921  */
922 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
923                                 struct ctdb_req_control_old *c,
924                                 TDB_DATA indata, 
925                                 bool *async_reply)
926 {
927         int ret;
928         struct release_ip_callback_state *state;
929         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
930         struct ctdb_vnn *vnn;
931         const char *iface;
932
933         /* update our vnn list */
934         vnn = find_public_ip_vnn(ctdb, &pip->addr);
935         if (vnn == NULL) {
936                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
937                         ctdb_addr_to_str(&pip->addr)));
938                 return 0;
939         }
940
941         /* stop any previous arps */
942         talloc_free(vnn->takeover_ctx);
943         vnn->takeover_ctx = NULL;
944
945         /* RELEASE_IP controls are sent to all nodes that should not
946          * be hosting a particular IP.  This serves 2 purposes.  The
947          * first is to help resolve any inconsistencies.  If a node
948          * does unexpectly host an IP then it will be released.  The
949          * 2nd is to use a "redundant release" to tell non-takeover
950          * nodes where an IP is moving to.  This is how "ctdb ip" can
951          * report the (likely) location of an IP by only asking the
952          * local node.  Redundant releases need to update the PNN but
953          * are otherwise ignored.
954          */
955         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
956                 if (!ctdb_sys_have_ip(&pip->addr)) {
957                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
958                                 ctdb_addr_to_str(&pip->addr),
959                                 vnn->public_netmask_bits,
960                                 ctdb_vnn_iface_string(vnn)));
961                         vnn->pnn = pip->pnn;
962                         ctdb_vnn_unassign_iface(ctdb, vnn);
963                         return 0;
964                 }
965         } else {
966                 if (vnn->iface == NULL) {
967                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
968                                            ctdb_addr_to_str(&pip->addr),
969                                            vnn->public_netmask_bits));
970                         vnn->pnn = pip->pnn;
971                         return 0;
972                 }
973         }
974
975         /* There is a potential race between take_ip and us because we
976          * update the VNN via a callback that run when the
977          * eventscripts have been run.  Avoid the race by allowing one
978          * update to be in flight at a time.
979          */
980         if (vnn->update_in_flight) {
981                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
982                                     "update for this IP already in flight\n",
983                                     ctdb_addr_to_str(&vnn->public_address),
984                                     vnn->public_netmask_bits));
985                 return -1;
986         }
987
988         iface = ctdb_vnn_iface_string(vnn);
989
990         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
991                 ctdb_addr_to_str(&pip->addr),
992                 vnn->public_netmask_bits,
993                 iface,
994                 pip->pnn));
995
996         state = talloc(ctdb, struct release_ip_callback_state);
997         if (state == NULL) {
998                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
999                                __FILE__, __LINE__);
1000                 return -1;
1001         }
1002
1003         state->c = NULL;
1004         state->addr = talloc(state, ctdb_sock_addr);
1005         if (state->addr == NULL) {
1006                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1007                                __FILE__, __LINE__);
1008                 talloc_free(state);
1009                 return -1;
1010         }
1011         *state->addr = pip->addr;
1012         state->target_pnn = pip->pnn;
1013         state->vnn   = vnn;
1014
1015         vnn->update_in_flight = true;
1016         talloc_set_destructor(state, ctdb_releaseip_destructor);
1017
1018         ret = ctdb_event_script_callback(ctdb, 
1019                                          state, release_ip_callback, state,
1020                                          CTDB_EVENT_RELEASE_IP,
1021                                          "%s %s %u",
1022                                          iface,
1023                                          ctdb_addr_to_str(&pip->addr),
1024                                          vnn->public_netmask_bits);
1025         if (ret != 0) {
1026                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1027                         ctdb_addr_to_str(&pip->addr),
1028                         ctdb_vnn_iface_string(vnn)));
1029                 talloc_free(state);
1030                 return -1;
1031         }
1032
1033         /* tell the control that we will be reply asynchronously */
1034         *async_reply = true;
1035         state->c = talloc_steal(state, c);
1036         return 0;
1037 }
1038
1039 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1040                                    ctdb_sock_addr *addr,
1041                                    unsigned mask, const char *ifaces,
1042                                    bool check_address)
1043 {
1044         struct ctdb_vnn      *vnn;
1045         char *tmp;
1046         const char *iface;
1047
1048         /* Verify that we don't have an entry for this IP yet */
1049         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1050                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1051                         DEBUG(DEBUG_ERR,
1052                               ("Duplicate public IP address '%s'\n",
1053                                ctdb_addr_to_str(addr)));
1054                         return -1;
1055                 }
1056         }
1057
1058         /* Create a new VNN structure for this IP address */
1059         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1060         if (vnn == NULL) {
1061                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1062                 return -1;
1063         }
1064         tmp = talloc_strdup(vnn, ifaces);
1065         if (tmp == NULL) {
1066                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1067                 talloc_free(vnn);
1068                 return -1;
1069         }
1070         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1071                 struct vnn_interface *vnn_iface;
1072                 struct ctdb_interface *i;
1073                 if (!ctdb_sys_check_iface_exists(iface)) {
1074                         DEBUG(DEBUG_ERR,
1075                               ("Unknown interface %s for public address %s\n",
1076                                iface, ctdb_addr_to_str(addr)));
1077                         talloc_free(vnn);
1078                         return -1;
1079                 }
1080
1081                 i = ctdb_add_local_iface(ctdb, iface);
1082                 if (i == NULL) {
1083                         DEBUG(DEBUG_ERR,
1084                               ("Failed to add interface '%s' "
1085                                "for public address %s\n",
1086                                iface, ctdb_addr_to_str(addr)));
1087                         talloc_free(vnn);
1088                         return -1;
1089                 }
1090
1091                 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1092                 if (vnn_iface == NULL) {
1093                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1094                         talloc_free(vnn);
1095                         return -1;
1096                 }
1097
1098                 vnn_iface->iface = i;
1099                 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1100         }
1101         talloc_free(tmp);
1102         vnn->public_address      = *addr;
1103         vnn->public_netmask_bits = mask;
1104         vnn->pnn                 = -1;
1105
1106         DLIST_ADD(ctdb->vnn, vnn);
1107
1108         return 0;
1109 }
1110
1111 /*
1112   setup the public address lists from a file
1113 */
1114 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1115 {
1116         bool ok;
1117         char **lines;
1118         int nlines;
1119         int i;
1120
1121         /* If no public addresses file given then try the default */
1122         if (ctdb->public_addresses_file == NULL) {
1123                 const char *b = getenv("CTDB_BASE");
1124                 if (b == NULL) {
1125                         DBG_ERR("CTDB_BASE not set\n");
1126                         return -1;
1127                 }
1128                 ctdb->public_addresses_file = talloc_asprintf(
1129                                         ctdb, "%s/%s", b, "public_addresses");
1130                 if (ctdb->public_addresses_file == NULL) {
1131                         DBG_ERR("Out of memory\n");
1132                         return -1;
1133                 }
1134         }
1135
1136         /* If the file doesn't exist then warn and do nothing */
1137         ok = file_exist(ctdb->public_addresses_file);
1138         if (!ok) {
1139                 D_WARNING("Not loading public addresses, no file %s\n",
1140                           ctdb->public_addresses_file);
1141                 return 0;
1142         }
1143
1144         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1145         if (lines == NULL) {
1146                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1147                 return -1;
1148         }
1149         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1150                 nlines--;
1151         }
1152
1153         for (i=0;i<nlines;i++) {
1154                 unsigned mask;
1155                 ctdb_sock_addr addr;
1156                 const char *addrstr;
1157                 const char *ifaces;
1158                 char *tok, *line;
1159
1160                 line = lines[i];
1161                 while ((*line == ' ') || (*line == '\t')) {
1162                         line++;
1163                 }
1164                 if (*line == '#') {
1165                         continue;
1166                 }
1167                 if (strcmp(line, "") == 0) {
1168                         continue;
1169                 }
1170                 tok = strtok(line, " \t");
1171                 addrstr = tok;
1172
1173                 tok = strtok(NULL, " \t");
1174                 if (tok == NULL) {
1175                         D_ERR("No interface specified at line %u "
1176                               "of public addresses file\n", i+1);
1177                         talloc_free(lines);
1178                         return -1;
1179                 }
1180                 ifaces = tok;
1181
1182                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1183                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1184                         talloc_free(lines);
1185                         return -1;
1186                 }
1187                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1188                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1189                         talloc_free(lines);
1190                         return -1;
1191                 }
1192         }
1193
1194
1195         D_NOTICE("Loaded public addresses from %s\n",
1196                  ctdb->public_addresses_file);
1197
1198         talloc_free(lines);
1199         return 0;
1200 }
1201
1202 /*
1203   destroy a ctdb_client_ip structure
1204  */
1205 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1206 {
1207         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1208                 ctdb_addr_to_str(&ip->addr),
1209                 ntohs(ip->addr.ip.sin_port),
1210                 ip->client_id));
1211
1212         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1213         return 0;
1214 }
1215
1216 /*
1217   called by a client to inform us of a TCP connection that it is managing
1218   that should tickled with an ACK when IP takeover is done
1219  */
1220 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1221                                 TDB_DATA indata)
1222 {
1223         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1224         struct ctdb_connection *tcp_sock = NULL;
1225         struct ctdb_tcp_list *tcp;
1226         struct ctdb_connection t;
1227         int ret;
1228         TDB_DATA data;
1229         struct ctdb_client_ip *ip;
1230         struct ctdb_vnn *vnn;
1231         ctdb_sock_addr addr;
1232
1233         /* If we don't have public IPs, tickles are useless */
1234         if (ctdb->vnn == NULL) {
1235                 return 0;
1236         }
1237
1238         tcp_sock = (struct ctdb_connection *)indata.dptr;
1239
1240         addr = tcp_sock->src;
1241         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1242         addr = tcp_sock->dst;
1243         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1244
1245         ZERO_STRUCT(addr);
1246         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1247         vnn = find_public_ip_vnn(ctdb, &addr);
1248         if (vnn == NULL) {
1249                 switch (addr.sa.sa_family) {
1250                 case AF_INET:
1251                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1252                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1253                                         ctdb_addr_to_str(&addr)));
1254                         }
1255                         break;
1256                 case AF_INET6:
1257                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1258                                 ctdb_addr_to_str(&addr)));
1259                         break;
1260                 default:
1261                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1262                 }
1263
1264                 return 0;
1265         }
1266
1267         if (vnn->pnn != ctdb->pnn) {
1268                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1269                         ctdb_addr_to_str(&addr),
1270                         client_id, client->pid));
1271                 /* failing this call will tell smbd to die */
1272                 return -1;
1273         }
1274
1275         ip = talloc(client, struct ctdb_client_ip);
1276         CTDB_NO_MEMORY(ctdb, ip);
1277
1278         ip->ctdb      = ctdb;
1279         ip->addr      = addr;
1280         ip->client_id = client_id;
1281         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1282         DLIST_ADD(ctdb->client_ip_list, ip);
1283
1284         tcp = talloc(client, struct ctdb_tcp_list);
1285         CTDB_NO_MEMORY(ctdb, tcp);
1286
1287         tcp->connection.src = tcp_sock->src;
1288         tcp->connection.dst = tcp_sock->dst;
1289
1290         DLIST_ADD(client->tcp_list, tcp);
1291
1292         t.src = tcp_sock->src;
1293         t.dst = tcp_sock->dst;
1294
1295         data.dptr = (uint8_t *)&t;
1296         data.dsize = sizeof(t);
1297
1298         switch (addr.sa.sa_family) {
1299         case AF_INET:
1300                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1301                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1302                         ctdb_addr_to_str(&tcp_sock->src),
1303                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1304                 break;
1305         case AF_INET6:
1306                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1307                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1308                         ctdb_addr_to_str(&tcp_sock->src),
1309                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1310                 break;
1311         default:
1312                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1313         }
1314
1315
1316         /* tell all nodes about this tcp connection */
1317         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1318                                        CTDB_CONTROL_TCP_ADD,
1319                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1320         if (ret != 0) {
1321                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1322                 return -1;
1323         }
1324
1325         return 0;
1326 }
1327
1328 /*
1329   find a tcp address on a list
1330  */
1331 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1332                                            struct ctdb_connection *tcp)
1333 {
1334         int i;
1335
1336         if (array == NULL) {
1337                 return NULL;
1338         }
1339
1340         for (i=0;i<array->num;i++) {
1341                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1342                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1343                         return &array->connections[i];
1344                 }
1345         }
1346         return NULL;
1347 }
1348
1349
1350
1351 /*
1352   called by a daemon to inform us of a TCP connection that one of its
1353   clients managing that should tickled with an ACK when IP takeover is
1354   done
1355  */
1356 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1357 {
1358         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1359         struct ctdb_tcp_array *tcparray;
1360         struct ctdb_connection tcp;
1361         struct ctdb_vnn *vnn;
1362
1363         /* If we don't have public IPs, tickles are useless */
1364         if (ctdb->vnn == NULL) {
1365                 return 0;
1366         }
1367
1368         vnn = find_public_ip_vnn(ctdb, &p->dst);
1369         if (vnn == NULL) {
1370                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1371                         ctdb_addr_to_str(&p->dst)));
1372
1373                 return -1;
1374         }
1375
1376
1377         tcparray = vnn->tcp_array;
1378
1379         /* If this is the first tickle */
1380         if (tcparray == NULL) {
1381                 tcparray = talloc(vnn, struct ctdb_tcp_array);
1382                 CTDB_NO_MEMORY(ctdb, tcparray);
1383                 vnn->tcp_array = tcparray;
1384
1385                 tcparray->num = 0;
1386                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1387                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1388
1389                 tcparray->connections[tcparray->num].src = p->src;
1390                 tcparray->connections[tcparray->num].dst = p->dst;
1391                 tcparray->num++;
1392
1393                 if (tcp_update_needed) {
1394                         vnn->tcp_update_needed = true;
1395                 }
1396                 return 0;
1397         }
1398
1399
1400         /* Do we already have this tickle ?*/
1401         tcp.src = p->src;
1402         tcp.dst = p->dst;
1403         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1404                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1405                         ctdb_addr_to_str(&tcp.dst),
1406                         ntohs(tcp.dst.ip.sin_port),
1407                         vnn->pnn));
1408                 return 0;
1409         }
1410
1411         /* A new tickle, we must add it to the array */
1412         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1413                                         struct ctdb_connection,
1414                                         tcparray->num+1);
1415         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1416
1417         tcparray->connections[tcparray->num].src = p->src;
1418         tcparray->connections[tcparray->num].dst = p->dst;
1419         tcparray->num++;
1420
1421         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1422                 ctdb_addr_to_str(&tcp.dst),
1423                 ntohs(tcp.dst.ip.sin_port),
1424                 vnn->pnn));
1425
1426         if (tcp_update_needed) {
1427                 vnn->tcp_update_needed = true;
1428         }
1429
1430         return 0;
1431 }
1432
1433
1434 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1435 {
1436         struct ctdb_connection *tcpp;
1437
1438         if (vnn == NULL) {
1439                 return;
1440         }
1441
1442         /* if the array is empty we cant remove it
1443            and we don't need to do anything
1444          */
1445         if (vnn->tcp_array == NULL) {
1446                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1447                         ctdb_addr_to_str(&conn->dst),
1448                         ntohs(conn->dst.ip.sin_port)));
1449                 return;
1450         }
1451
1452
1453         /* See if we know this connection
1454            if we don't know this connection  then we dont need to do anything
1455          */
1456         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1457         if (tcpp == NULL) {
1458                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1459                         ctdb_addr_to_str(&conn->dst),
1460                         ntohs(conn->dst.ip.sin_port)));
1461                 return;
1462         }
1463
1464
1465         /* We need to remove this entry from the array.
1466            Instead of allocating a new array and copying data to it
1467            we cheat and just copy the last entry in the existing array
1468            to the entry that is to be removed and just shring the 
1469            ->num field
1470          */
1471         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1472         vnn->tcp_array->num--;
1473
1474         /* If we deleted the last entry we also need to remove the entire array
1475          */
1476         if (vnn->tcp_array->num == 0) {
1477                 talloc_free(vnn->tcp_array);
1478                 vnn->tcp_array = NULL;
1479         }               
1480
1481         vnn->tcp_update_needed = true;
1482
1483         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1484                 ctdb_addr_to_str(&conn->src),
1485                 ntohs(conn->src.ip.sin_port)));
1486 }
1487
1488
1489 /*
1490   called by a daemon to inform us of a TCP connection that one of its
1491   clients used are no longer needed in the tickle database
1492  */
1493 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1494 {
1495         struct ctdb_vnn *vnn;
1496         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1497
1498         /* If we don't have public IPs, tickles are useless */
1499         if (ctdb->vnn == NULL) {
1500                 return 0;
1501         }
1502
1503         vnn = find_public_ip_vnn(ctdb, &conn->dst);
1504         if (vnn == NULL) {
1505                 DEBUG(DEBUG_ERR,
1506                       (__location__ " unable to find public address %s\n",
1507                        ctdb_addr_to_str(&conn->dst)));
1508                 return 0;
1509         }
1510
1511         ctdb_remove_connection(vnn, conn);
1512
1513         return 0;
1514 }
1515
1516
1517 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1518                                               bool force);
1519
1520 /*
1521   Called when another daemon starts - causes all tickles for all
1522   public addresses we are serving to be sent to the new node on the
1523   next check.  This actually causes the tickles to be sent to the
1524   other node immediately.  In case there is an error, the periodic
1525   timer will send the updates on timer event.  This is simple and
1526   doesn't require careful error handling.
1527  */
1528 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1529 {
1530         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1531                            (unsigned long) pnn));
1532
1533         ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1534         return 0;
1535 }
1536
1537
1538 /*
1539   called when a client structure goes away - hook to remove
1540   elements from the tcp_list in all daemons
1541  */
1542 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1543 {
1544         while (client->tcp_list) {
1545                 struct ctdb_vnn *vnn;
1546                 struct ctdb_tcp_list *tcp = client->tcp_list;
1547                 struct ctdb_connection *conn = &tcp->connection;
1548
1549                 DLIST_REMOVE(client->tcp_list, tcp);
1550
1551                 vnn = find_public_ip_vnn(client->ctdb,
1552                                          &conn->dst);
1553                 if (vnn == NULL) {
1554                         DEBUG(DEBUG_ERR,
1555                               (__location__ " unable to find public address %s\n",
1556                                ctdb_addr_to_str(&conn->dst)));
1557                         continue;
1558                 }
1559
1560                 /* If the IP address is hosted on this node then
1561                  * remove the connection. */
1562                 if (vnn->pnn == client->ctdb->pnn) {
1563                         ctdb_remove_connection(vnn, conn);
1564                 }
1565
1566                 /* Otherwise this function has been called because the
1567                  * server IP address has been released to another node
1568                  * and the client has exited.  This means that we
1569                  * should not delete the connection information.  The
1570                  * takeover node processes connections too. */
1571         }
1572 }
1573
1574
1575 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1576 {
1577         struct ctdb_vnn *vnn, *next;
1578         int count = 0;
1579
1580         if (ctdb->tunable.disable_ip_failover == 1) {
1581                 return;
1582         }
1583
1584         for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1585                 /* vnn can be freed below in release_ip_post() */
1586                 next = vnn->next;
1587
1588                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1589                         ctdb_vnn_unassign_iface(ctdb, vnn);
1590                         continue;
1591                 }
1592
1593                 /* Don't allow multiple releases at once.  Some code,
1594                  * particularly ctdb_tickle_sentenced_connections() is
1595                  * not re-entrant */
1596                 if (vnn->update_in_flight) {
1597                         DEBUG(DEBUG_WARNING,
1598                               (__location__
1599                                " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1600                                     ctdb_addr_to_str(&vnn->public_address),
1601                                     vnn->public_netmask_bits,
1602                                     ctdb_vnn_iface_string(vnn)));
1603                         continue;
1604                 }
1605                 vnn->update_in_flight = true;
1606
1607                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1608                                     ctdb_addr_to_str(&vnn->public_address),
1609                                     vnn->public_netmask_bits,
1610                                     ctdb_vnn_iface_string(vnn)));
1611
1612                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1613                                        ctdb_vnn_iface_string(vnn),
1614                                        ctdb_addr_to_str(&vnn->public_address),
1615                                        vnn->public_netmask_bits);
1616                 /* releaseip timeouts are converted to success, so to
1617                  * detect failures just check if the IP address is
1618                  * still there...
1619                  */
1620                 if (ctdb_sys_have_ip(&vnn->public_address)) {
1621                         DEBUG(DEBUG_ERR,
1622                               (__location__
1623                                " IP address %s not released\n",
1624                                ctdb_addr_to_str(&vnn->public_address)));
1625                         vnn->update_in_flight = false;
1626                         continue;
1627                 }
1628
1629                 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1630                 if (vnn != NULL) {
1631                         vnn->update_in_flight = false;
1632                 }
1633                 count++;
1634         }
1635
1636         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1637 }
1638
1639
1640 /*
1641   get list of public IPs
1642  */
1643 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1644                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
1645 {
1646         int i, num, len;
1647         struct ctdb_public_ip_list_old *ips;
1648         struct ctdb_vnn *vnn;
1649         bool only_available = false;
1650
1651         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1652                 only_available = true;
1653         }
1654
1655         /* count how many public ip structures we have */
1656         num = 0;
1657         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1658                 num++;
1659         }
1660
1661         len = offsetof(struct ctdb_public_ip_list_old, ips) +
1662                 num*sizeof(struct ctdb_public_ip);
1663         ips = talloc_zero_size(outdata, len);
1664         CTDB_NO_MEMORY(ctdb, ips);
1665
1666         i = 0;
1667         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1668                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1669                         continue;
1670                 }
1671                 ips->ips[i].pnn  = vnn->pnn;
1672                 ips->ips[i].addr = vnn->public_address;
1673                 i++;
1674         }
1675         ips->num = i;
1676         len = offsetof(struct ctdb_public_ip_list_old, ips) +
1677                 i*sizeof(struct ctdb_public_ip);
1678
1679         outdata->dsize = len;
1680         outdata->dptr  = (uint8_t *)ips;
1681
1682         return 0;
1683 }
1684
1685
1686 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1687                                         struct ctdb_req_control_old *c,
1688                                         TDB_DATA indata,
1689                                         TDB_DATA *outdata)
1690 {
1691         int i, num, len;
1692         ctdb_sock_addr *addr;
1693         struct ctdb_public_ip_info_old *info;
1694         struct ctdb_vnn *vnn;
1695         struct vnn_interface *iface;
1696
1697         addr = (ctdb_sock_addr *)indata.dptr;
1698
1699         vnn = find_public_ip_vnn(ctdb, addr);
1700         if (vnn == NULL) {
1701                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1702                                  "'%s'not a public address\n",
1703                                  ctdb_addr_to_str(addr)));
1704                 return -1;
1705         }
1706
1707         /* count how many public ip structures we have */
1708         num = 0;
1709         for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1710                 num++;
1711         }
1712
1713         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1714                 num*sizeof(struct ctdb_iface);
1715         info = talloc_zero_size(outdata, len);
1716         CTDB_NO_MEMORY(ctdb, info);
1717
1718         info->ip.addr = vnn->public_address;
1719         info->ip.pnn = vnn->pnn;
1720         info->active_idx = 0xFFFFFFFF;
1721
1722         i = 0;
1723         for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1724                 struct ctdb_interface *cur;
1725
1726                 cur = iface->iface;
1727                 if (vnn->iface == cur) {
1728                         info->active_idx = i;
1729                 }
1730                 strncpy(info->ifaces[i].name, cur->name,
1731                         sizeof(info->ifaces[i].name));
1732                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1733                 info->ifaces[i].link_state = cur->link_up;
1734                 info->ifaces[i].references = cur->references;
1735
1736                 i++;
1737         }
1738         info->num = i;
1739         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1740                 i*sizeof(struct ctdb_iface);
1741
1742         outdata->dsize = len;
1743         outdata->dptr  = (uint8_t *)info;
1744
1745         return 0;
1746 }
1747
1748 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1749                                 struct ctdb_req_control_old *c,
1750                                 TDB_DATA *outdata)
1751 {
1752         int i, num, len;
1753         struct ctdb_iface_list_old *ifaces;
1754         struct ctdb_interface *cur;
1755
1756         /* count how many public ip structures we have */
1757         num = 0;
1758         for (cur=ctdb->ifaces;cur;cur=cur->next) {
1759                 num++;
1760         }
1761
1762         len = offsetof(struct ctdb_iface_list_old, ifaces) +
1763                 num*sizeof(struct ctdb_iface);
1764         ifaces = talloc_zero_size(outdata, len);
1765         CTDB_NO_MEMORY(ctdb, ifaces);
1766
1767         i = 0;
1768         for (cur=ctdb->ifaces;cur;cur=cur->next) {
1769                 strncpy(ifaces->ifaces[i].name, cur->name,
1770                         sizeof(ifaces->ifaces[i].name));
1771                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1772                 ifaces->ifaces[i].link_state = cur->link_up;
1773                 ifaces->ifaces[i].references = cur->references;
1774                 i++;
1775         }
1776         ifaces->num = i;
1777         len = offsetof(struct ctdb_iface_list_old, ifaces) +
1778                 i*sizeof(struct ctdb_iface);
1779
1780         outdata->dsize = len;
1781         outdata->dptr  = (uint8_t *)ifaces;
1782
1783         return 0;
1784 }
1785
1786 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1787                                     struct ctdb_req_control_old *c,
1788                                     TDB_DATA indata)
1789 {
1790         struct ctdb_iface *info;
1791         struct ctdb_interface *iface;
1792         bool link_up = false;
1793
1794         info = (struct ctdb_iface *)indata.dptr;
1795
1796         if (info->name[CTDB_IFACE_SIZE] != '\0') {
1797                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1798                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1799                                   len, len, info->name));
1800                 return -1;
1801         }
1802
1803         switch (info->link_state) {
1804         case 0:
1805                 link_up = false;
1806                 break;
1807         case 1:
1808                 link_up = true;
1809                 break;
1810         default:
1811                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1812                                   (unsigned int)info->link_state));
1813                 return -1;
1814         }
1815
1816         if (info->references != 0) {
1817                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1818                                   (unsigned int)info->references));
1819                 return -1;
1820         }
1821
1822         iface = ctdb_find_iface(ctdb, info->name);
1823         if (iface == NULL) {
1824                 return -1;
1825         }
1826
1827         if (link_up == iface->link_up) {
1828                 return 0;
1829         }
1830
1831         DEBUG(DEBUG_ERR,
1832               ("iface[%s] has changed it's link status %s => %s\n",
1833                iface->name,
1834                iface->link_up?"up":"down",
1835                link_up?"up":"down"));
1836
1837         iface->link_up = link_up;
1838         return 0;
1839 }
1840
1841
1842 /*
1843   called by a daemon to inform us of the entire list of TCP tickles for
1844   a particular public address.
1845   this control should only be sent by the node that is currently serving
1846   that public address.
1847  */
1848 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1849 {
1850         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1851         struct ctdb_tcp_array *tcparray;
1852         struct ctdb_vnn *vnn;
1853
1854         /* We must at least have tickles.num or else we cant verify the size
1855            of the received data blob
1856          */
1857         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1858                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1859                 return -1;
1860         }
1861
1862         /* verify that the size of data matches what we expect */
1863         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1864                          + sizeof(struct ctdb_connection) * list->num) {
1865                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1866                 return -1;
1867         }
1868
1869         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1870                            ctdb_addr_to_str(&list->addr)));
1871
1872         vnn = find_public_ip_vnn(ctdb, &list->addr);
1873         if (vnn == NULL) {
1874                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1875                         ctdb_addr_to_str(&list->addr)));
1876
1877                 return 1;
1878         }
1879
1880         if (vnn->pnn == ctdb->pnn) {
1881                 DEBUG(DEBUG_INFO,
1882                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1883                        ctdb_addr_to_str(&list->addr)));
1884                 return 0;
1885         }
1886
1887         /* remove any old ticklelist we might have */
1888         talloc_free(vnn->tcp_array);
1889         vnn->tcp_array = NULL;
1890
1891         tcparray = talloc(vnn, struct ctdb_tcp_array);
1892         CTDB_NO_MEMORY(ctdb, tcparray);
1893
1894         tcparray->num = list->num;
1895
1896         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1897         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1898
1899         memcpy(tcparray->connections, &list->connections[0],
1900                sizeof(struct ctdb_connection)*tcparray->num);
1901
1902         /* We now have a new fresh tickle list array for this vnn */
1903         vnn->tcp_array = tcparray;
1904
1905         return 0;
1906 }
1907
1908 /*
1909   called to return the full list of tickles for the puclic address associated 
1910   with the provided vnn
1911  */
1912 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1913 {
1914         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1915         struct ctdb_tickle_list_old *list;
1916         struct ctdb_tcp_array *tcparray;
1917         int num, i;
1918         struct ctdb_vnn *vnn;
1919         unsigned port;
1920
1921         vnn = find_public_ip_vnn(ctdb, addr);
1922         if (vnn == NULL) {
1923                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1924                         ctdb_addr_to_str(addr)));
1925
1926                 return 1;
1927         }
1928
1929         port = ctdb_addr_to_port(addr);
1930
1931         tcparray = vnn->tcp_array;
1932         num = 0;
1933         if (tcparray != NULL) {
1934                 if (port == 0) {
1935                         /* All connections */
1936                         num = tcparray->num;
1937                 } else {
1938                         /* Count connections for port */
1939                         for (i = 0; i < tcparray->num; i++) {
1940                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1941                                         num++;
1942                                 }
1943                         }
1944                 }
1945         }
1946
1947         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
1948                         + sizeof(struct ctdb_connection) * num;
1949
1950         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1951         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1952         list = (struct ctdb_tickle_list_old *)outdata->dptr;
1953
1954         list->addr = *addr;
1955         list->num = num;
1956
1957         if (num == 0) {
1958                 return 0;
1959         }
1960
1961         num = 0;
1962         for (i = 0; i < tcparray->num; i++) {
1963                 if (port == 0 || \
1964                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1965                         list->connections[num] = tcparray->connections[i];
1966                         num++;
1967                 }
1968         }
1969
1970         return 0;
1971 }
1972
1973
1974 /*
1975   set the list of all tcp tickles for a public address
1976  */
1977 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
1978                                             ctdb_sock_addr *addr,
1979                                             struct ctdb_tcp_array *tcparray)
1980 {
1981         int ret, num;
1982         TDB_DATA data;
1983         struct ctdb_tickle_list_old *list;
1984
1985         if (tcparray) {
1986                 num = tcparray->num;
1987         } else {
1988                 num = 0;
1989         }
1990
1991         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
1992                         sizeof(struct ctdb_connection) * num;
1993         data.dptr = talloc_size(ctdb, data.dsize);
1994         CTDB_NO_MEMORY(ctdb, data.dptr);
1995
1996         list = (struct ctdb_tickle_list_old *)data.dptr;
1997         list->addr = *addr;
1998         list->num = num;
1999         if (tcparray) {
2000                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2001         }
2002
2003         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2004                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2005                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2006         if (ret != 0) {
2007                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2008                 return -1;
2009         }
2010
2011         talloc_free(data.dptr);
2012
2013         return ret;
2014 }
2015
2016 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
2017                                               bool force)
2018 {
2019         struct ctdb_vnn *vnn;
2020         int ret;
2021
2022         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2023                 /* we only send out updates for public addresses that
2024                    we have taken over
2025                  */
2026                 if (ctdb->pnn != vnn->pnn) {
2027                         continue;
2028                 }
2029
2030                 /* We only send out the updates if we need to */
2031                 if (!force && !vnn->tcp_update_needed) {
2032                         continue;
2033                 }
2034
2035                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2036                                                        &vnn->public_address,
2037                                                        vnn->tcp_array);
2038                 if (ret != 0) {
2039                         D_ERR("Failed to send the tickle update for ip %s\n",
2040                               ctdb_addr_to_str(&vnn->public_address));
2041                         vnn->tcp_update_needed = true;
2042                 } else {
2043                         D_INFO("Sent tickle update for ip %s\n",
2044                                ctdb_addr_to_str(&vnn->public_address));
2045                         vnn->tcp_update_needed = false;
2046                 }
2047         }
2048
2049 }
2050
2051 /*
2052   perform tickle updates if required
2053  */
2054 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2055                                     struct tevent_timer *te,
2056                                     struct timeval t, void *private_data)
2057 {
2058         struct ctdb_context *ctdb = talloc_get_type(
2059                 private_data, struct ctdb_context);
2060
2061         ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2062
2063         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2064                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2065                          ctdb_update_tcp_tickles, ctdb);
2066 }
2067
2068 /*
2069   start periodic update of tcp tickles
2070  */
2071 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2072 {
2073         ctdb->tickle_update_context = talloc_new(ctdb);
2074
2075         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2076                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2077                          ctdb_update_tcp_tickles, ctdb);
2078 }
2079
2080
2081
2082
2083 struct control_gratious_arp {
2084         struct ctdb_context *ctdb;
2085         ctdb_sock_addr addr;
2086         const char *iface;
2087         int count;
2088 };
2089
2090 /*
2091   send a control_gratuitous arp
2092  */
2093 static void send_gratious_arp(struct tevent_context *ev,
2094                               struct tevent_timer *te,
2095                               struct timeval t, void *private_data)
2096 {
2097         int ret;
2098         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2099                                                         struct control_gratious_arp);
2100
2101         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2102         if (ret != 0) {
2103                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2104                                  arp->iface, strerror(errno)));
2105         }
2106
2107
2108         arp->count++;
2109         if (arp->count == CTDB_ARP_REPEAT) {
2110                 talloc_free(arp);
2111                 return;
2112         }
2113
2114         tevent_add_timer(arp->ctdb->ev, arp,
2115                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2116                          send_gratious_arp, arp);
2117 }
2118
2119
2120 /*
2121   send a gratious arp 
2122  */
2123 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2124 {
2125         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2126         struct control_gratious_arp *arp;
2127
2128         /* verify the size of indata */
2129         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2130                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2131                                  (unsigned)indata.dsize, 
2132                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2133                 return -1;
2134         }
2135         if (indata.dsize != 
2136                 ( offsetof(struct ctdb_addr_info_old, iface)
2137                 + gratious_arp->len ) ){
2138
2139                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2140                         "but should be %u bytes\n", 
2141                          (unsigned)indata.dsize, 
2142                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2143                 return -1;
2144         }
2145
2146
2147         arp = talloc(ctdb, struct control_gratious_arp);
2148         CTDB_NO_MEMORY(ctdb, arp);
2149
2150         arp->ctdb  = ctdb;
2151         arp->addr   = gratious_arp->addr;
2152         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2153         CTDB_NO_MEMORY(ctdb, arp->iface);
2154         arp->count = 0;
2155
2156         tevent_add_timer(arp->ctdb->ev, arp,
2157                          timeval_zero(), send_gratious_arp, arp);
2158
2159         return 0;
2160 }
2161
2162 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2163 {
2164         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2165         int ret;
2166
2167         /* verify the size of indata */
2168         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2169                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2170                 return -1;
2171         }
2172         if (indata.dsize != 
2173                 ( offsetof(struct ctdb_addr_info_old, iface)
2174                 + pub->len ) ){
2175
2176                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2177                         "but should be %u bytes\n", 
2178                          (unsigned)indata.dsize, 
2179                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2180                 return -1;
2181         }
2182
2183         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2184
2185         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2186
2187         if (ret != 0) {
2188                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2189                 return -1;
2190         }
2191
2192         return 0;
2193 }
2194
2195 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2196 {
2197         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2198         struct ctdb_vnn *vnn;
2199
2200         /* verify the size of indata */
2201         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2202                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2203                 return -1;
2204         }
2205         if (indata.dsize != 
2206                 ( offsetof(struct ctdb_addr_info_old, iface)
2207                 + pub->len ) ){
2208
2209                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2210                         "but should be %u bytes\n", 
2211                          (unsigned)indata.dsize, 
2212                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2213                 return -1;
2214         }
2215
2216         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2217
2218         /* walk over all public addresses until we find a match */
2219         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2220                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2221                         if (vnn->pnn == ctdb->pnn) {
2222                                 /* This IP is currently being hosted.
2223                                  * Defer the deletion until the next
2224                                  * takeover run. "ctdb reloadips" will
2225                                  * always cause a takeover run.  "ctdb
2226                                  * delip" will now need an explicit
2227                                  * "ctdb ipreallocated" afterwards. */
2228                                 vnn->delete_pending = true;
2229                         } else {
2230                                 /* This IP is not hosted on the
2231                                  * current node so just delete it
2232                                  * now. */
2233                                 do_delete_ip(ctdb, vnn);
2234                         }
2235
2236                         return 0;
2237                 }
2238         }
2239
2240         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2241                          ctdb_addr_to_str(&pub->addr)));
2242         return -1;
2243 }
2244
2245
2246 struct ipreallocated_callback_state {
2247         struct ctdb_req_control_old *c;
2248 };
2249
2250 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2251                                         int status, void *p)
2252 {
2253         struct ipreallocated_callback_state *state =
2254                 talloc_get_type(p, struct ipreallocated_callback_state);
2255
2256         if (status != 0) {
2257                 DEBUG(DEBUG_ERR,
2258                       (" \"ipreallocated\" event script failed (status %d)\n",
2259                        status));
2260                 if (status == -ETIME) {
2261                         ctdb_ban_self(ctdb);
2262                 }
2263         }
2264
2265         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2266         talloc_free(state);
2267 }
2268
2269 /* A control to run the ipreallocated event */
2270 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2271                                    struct ctdb_req_control_old *c,
2272                                    bool *async_reply)
2273 {
2274         int ret;
2275         struct ipreallocated_callback_state *state;
2276
2277         state = talloc(ctdb, struct ipreallocated_callback_state);
2278         CTDB_NO_MEMORY(ctdb, state);
2279
2280         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2281
2282         ret = ctdb_event_script_callback(ctdb, state,
2283                                          ctdb_ipreallocated_callback, state,
2284                                          CTDB_EVENT_IPREALLOCATED,
2285                                          "%s", "");
2286
2287         if (ret != 0) {
2288                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2289                 talloc_free(state);
2290                 return -1;
2291         }
2292
2293         /* tell the control that we will be reply asynchronously */
2294         state->c    = talloc_steal(state, c);
2295         *async_reply = true;
2296
2297         return 0;
2298 }
2299
2300
2301 struct ctdb_reloadips_handle {
2302         struct ctdb_context *ctdb;
2303         struct ctdb_req_control_old *c;
2304         int status;
2305         int fd[2];
2306         pid_t child;
2307         struct tevent_fd *fde;
2308 };
2309
2310 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2311 {
2312         if (h == h->ctdb->reload_ips) {
2313                 h->ctdb->reload_ips = NULL;
2314         }
2315         if (h->c != NULL) {
2316                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2317                 h->c = NULL;
2318         }
2319         ctdb_kill(h->ctdb, h->child, SIGKILL);
2320         return 0;
2321 }
2322
2323 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2324                                          struct tevent_timer *te,
2325                                          struct timeval t, void *private_data)
2326 {
2327         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2328
2329         talloc_free(h);
2330 }
2331
2332 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2333                                          struct tevent_fd *fde,
2334                                          uint16_t flags, void *private_data)
2335 {
2336         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2337
2338         char res;
2339         int ret;
2340
2341         ret = sys_read(h->fd[0], &res, 1);
2342         if (ret < 1 || res != 0) {
2343                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2344                 res = 1;
2345         }
2346         h->status = res;
2347
2348         talloc_free(h);
2349 }
2350
2351 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2352 {
2353         TALLOC_CTX *mem_ctx = talloc_new(NULL);
2354         struct ctdb_public_ip_list_old *ips;
2355         struct ctdb_vnn *vnn;
2356         struct client_async_data *async_data;
2357         struct timeval timeout;
2358         TDB_DATA data;
2359         struct ctdb_client_control_state *state;
2360         bool first_add;
2361         int i, ret;
2362
2363         CTDB_NO_MEMORY(ctdb, mem_ctx);
2364
2365         /* Read IPs from local node */
2366         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2367                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
2368         if (ret != 0) {
2369                 DEBUG(DEBUG_ERR,
2370                       ("Unable to fetch public IPs from local node\n"));
2371                 talloc_free(mem_ctx);
2372                 return -1;
2373         }
2374
2375         /* Read IPs file - this is safe since this is a child process */
2376         ctdb->vnn = NULL;
2377         if (ctdb_set_public_addresses(ctdb, false) != 0) {
2378                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2379                 talloc_free(mem_ctx);
2380                 return -1;
2381         }
2382
2383         async_data = talloc_zero(mem_ctx, struct client_async_data);
2384         CTDB_NO_MEMORY(ctdb, async_data);
2385
2386         /* Compare IPs between node and file for IPs to be deleted */
2387         for (i = 0; i < ips->num; i++) {
2388                 /* */
2389                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2390                         if (ctdb_same_ip(&vnn->public_address,
2391                                          &ips->ips[i].addr)) {
2392                                 /* IP is still in file */
2393                                 break;
2394                         }
2395                 }
2396
2397                 if (vnn == NULL) {
2398                         /* Delete IP ips->ips[i] */
2399                         struct ctdb_addr_info_old *pub;
2400
2401                         DEBUG(DEBUG_NOTICE,
2402                               ("IP %s no longer configured, deleting it\n",
2403                                ctdb_addr_to_str(&ips->ips[i].addr)));
2404
2405                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2406                         CTDB_NO_MEMORY(ctdb, pub);
2407
2408                         pub->addr  = ips->ips[i].addr;
2409                         pub->mask  = 0;
2410                         pub->len   = 0;
2411
2412                         timeout = TAKEOVER_TIMEOUT();
2413
2414                         data.dsize = offsetof(struct ctdb_addr_info_old,
2415                                               iface) + pub->len;
2416                         data.dptr = (uint8_t *)pub;
2417
2418                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2419                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
2420                                                   0, data, async_data,
2421                                                   &timeout, NULL);
2422                         if (state == NULL) {
2423                                 DEBUG(DEBUG_ERR,
2424                                       (__location__
2425                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2426                                 goto failed;
2427                         }
2428
2429                         ctdb_client_async_add(async_data, state);
2430                 }
2431         }
2432
2433         /* Compare IPs between node and file for IPs to be added */
2434         first_add = true;
2435         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2436                 for (i = 0; i < ips->num; i++) {
2437                         if (ctdb_same_ip(&vnn->public_address,
2438                                          &ips->ips[i].addr)) {
2439                                 /* IP already on node */
2440                                 break;
2441                         }
2442                 }
2443                 if (i == ips->num) {
2444                         /* Add IP ips->ips[i] */
2445                         struct ctdb_addr_info_old *pub;
2446                         const char *ifaces = NULL;
2447                         uint32_t len;
2448                         struct vnn_interface *iface = NULL;
2449
2450                         DEBUG(DEBUG_NOTICE,
2451                               ("New IP %s configured, adding it\n",
2452                                ctdb_addr_to_str(&vnn->public_address)));
2453                         if (first_add) {
2454                                 uint32_t pnn = ctdb_get_pnn(ctdb);
2455
2456                                 data.dsize = sizeof(pnn);
2457                                 data.dptr  = (uint8_t *)&pnn;
2458
2459                                 ret = ctdb_client_send_message(
2460                                         ctdb,
2461                                         CTDB_BROADCAST_CONNECTED,
2462                                         CTDB_SRVID_REBALANCE_NODE,
2463                                         data);
2464                                 if (ret != 0) {
2465                                         DEBUG(DEBUG_WARNING,
2466                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2467                                 }
2468
2469                                 first_add = false;
2470                         }
2471
2472                         ifaces = vnn->ifaces->iface->name;
2473                         iface = vnn->ifaces->next;
2474                         while (iface != NULL) {
2475                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2476                                                          iface->iface->name);
2477                                 iface = iface->next;
2478                         }
2479
2480                         len   = strlen(ifaces) + 1;
2481                         pub = talloc_zero_size(mem_ctx,
2482                                                offsetof(struct ctdb_addr_info_old, iface) + len);
2483                         CTDB_NO_MEMORY(ctdb, pub);
2484
2485                         pub->addr  = vnn->public_address;
2486                         pub->mask  = vnn->public_netmask_bits;
2487                         pub->len   = len;
2488                         memcpy(&pub->iface[0], ifaces, pub->len);
2489
2490                         timeout = TAKEOVER_TIMEOUT();
2491
2492                         data.dsize = offsetof(struct ctdb_addr_info_old,
2493                                               iface) + pub->len;
2494                         data.dptr = (uint8_t *)pub;
2495
2496                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2497                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
2498                                                   0, data, async_data,
2499                                                   &timeout, NULL);
2500                         if (state == NULL) {
2501                                 DEBUG(DEBUG_ERR,
2502                                       (__location__
2503                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2504                                 goto failed;
2505                         }
2506
2507                         ctdb_client_async_add(async_data, state);
2508                 }
2509         }
2510
2511         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2512                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2513                 goto failed;
2514         }
2515
2516         talloc_free(mem_ctx);
2517         return 0;
2518
2519 failed:
2520         talloc_free(mem_ctx);
2521         return -1;
2522 }
2523
2524 /* This control is sent to force the node to re-read the public addresses file
2525    and drop any addresses we should nnot longer host, and add new addresses
2526    that we are now able to host
2527 */
2528 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2529 {
2530         struct ctdb_reloadips_handle *h;
2531         pid_t parent = getpid();
2532
2533         if (ctdb->reload_ips != NULL) {
2534                 talloc_free(ctdb->reload_ips);
2535                 ctdb->reload_ips = NULL;
2536         }
2537
2538         h = talloc(ctdb, struct ctdb_reloadips_handle);
2539         CTDB_NO_MEMORY(ctdb, h);
2540         h->ctdb     = ctdb;
2541         h->c        = NULL;
2542         h->status   = -1;
2543         
2544         if (pipe(h->fd) == -1) {
2545                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2546                 talloc_free(h);
2547                 return -1;
2548         }
2549
2550         h->child = ctdb_fork(ctdb);
2551         if (h->child == (pid_t)-1) {
2552                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2553                 close(h->fd[0]);
2554                 close(h->fd[1]);
2555                 talloc_free(h);
2556                 return -1;
2557         }
2558
2559         /* child process */
2560         if (h->child == 0) {
2561                 signed char res = 0;
2562
2563                 close(h->fd[0]);
2564
2565                 prctl_set_comment("ctdb_reloadips");
2566                 if (switch_from_server_to_client(ctdb) != 0) {
2567                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2568                         res = -1;
2569                 } else {
2570                         res = ctdb_reloadips_child(ctdb);
2571                         if (res != 0) {
2572                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2573                         }
2574                 }
2575
2576                 sys_write(h->fd[1], &res, 1);
2577                 ctdb_wait_for_process_to_exit(parent);
2578                 _exit(0);
2579         }
2580
2581         h->c             = talloc_steal(h, c);
2582
2583         close(h->fd[1]);
2584         set_close_on_exec(h->fd[0]);
2585
2586         talloc_set_destructor(h, ctdb_reloadips_destructor);
2587
2588
2589         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2590                                ctdb_reloadips_child_handler, (void *)h);
2591         tevent_fd_set_auto_close(h->fde);
2592
2593         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2594                          ctdb_reloadips_timeout_event, h);
2595
2596         /* we reply later */
2597         *async_reply = true;
2598         return 0;
2599 }