ctdb-ipalloc: Clean up reloading of remote public IPs
[sharpe/samba-autobuild/.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
59 {
60         if (vnn->iface) {
61                 return vnn->iface->name;
62         }
63
64         return "__none__";
65 }
66
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
68 {
69         struct ctdb_interface *i;
70
71         if (strlen(iface) > CTDB_IFACE_SIZE) {
72                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
73                 return -1;
74         }
75
76         /* Verify that we don't have an entry for this ip yet */
77         for (i=ctdb->ifaces;i;i=i->next) {
78                 if (strcmp(i->name, iface) == 0) {
79                         return 0;
80                 }
81         }
82
83         /* create a new structure for this interface */
84         i = talloc_zero(ctdb, struct ctdb_interface);
85         CTDB_NO_MEMORY_FATAL(ctdb, i);
86         i->name = talloc_strdup(i, iface);
87         CTDB_NO_MEMORY(ctdb, i->name);
88
89         i->link_up = true;
90
91         DLIST_ADD(ctdb->ifaces, i);
92
93         return 0;
94 }
95
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
97                                         const char *name)
98 {
99         int n;
100
101         for (n = 0; vnn->ifaces[n] != NULL; n++) {
102                 if (strcmp(name, vnn->ifaces[n]) == 0) {
103                         return true;
104                 }
105         }
106
107         return false;
108 }
109
110 /* If any interfaces now have no possible IPs then delete them.  This
111  * implementation is naive (i.e. simple) rather than clever
112  * (i.e. complex).  Given that this is run on delip and that operation
113  * is rare, this doesn't need to be efficient - it needs to be
114  * foolproof.  One alternative is reference counting, where the logic
115  * is distributed and can, therefore, be broken in multiple places.
116  * Another alternative is to build a red-black tree of interfaces that
117  * can have addresses (by walking ctdb->vnn once) and then walking
118  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
119  * one of those if the naive implementation causes problems...  :-)
120  */
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122                                         struct ctdb_vnn *vnn)
123 {
124         struct ctdb_interface *i, *next;
125
126         /* For each interface, check if there's an IP using it. */
127         for (i = ctdb->ifaces; i != NULL; i = next) {
128                 struct ctdb_vnn *tv;
129                 bool found;
130                 next = i->next;
131
132                 /* Only consider interfaces named in the given VNN. */
133                 if (!vnn_has_interface_with_name(vnn, i->name)) {
134                         continue;
135                 }
136
137                 /* Search for a vnn with this interface. */
138                 found = false;
139                 for (tv=ctdb->vnn; tv; tv=tv->next) {
140                         if (vnn_has_interface_with_name(tv, i->name)) {
141                                 found = true;
142                                 break;
143                         }
144                 }
145
146                 if (!found) {
147                         /* None of the VNNs are using this interface. */
148                         DLIST_REMOVE(ctdb->ifaces, i);
149                         talloc_free(i);
150                 }
151         }
152 }
153
154
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
156                                               const char *iface)
157 {
158         struct ctdb_interface *i;
159
160         for (i=ctdb->ifaces;i;i=i->next) {
161                 if (strcmp(i->name, iface) == 0) {
162                         return i;
163                 }
164         }
165
166         return NULL;
167 }
168
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170                                                   struct ctdb_vnn *vnn)
171 {
172         int i;
173         struct ctdb_interface *cur = NULL;
174         struct ctdb_interface *best = NULL;
175
176         for (i=0; vnn->ifaces[i]; i++) {
177
178                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
179                 if (cur == NULL) {
180                         continue;
181                 }
182
183                 if (!cur->link_up) {
184                         continue;
185                 }
186
187                 if (best == NULL) {
188                         best = cur;
189                         continue;
190                 }
191
192                 if (cur->references < best->references) {
193                         best = cur;
194                         continue;
195                 }
196         }
197
198         return best;
199 }
200
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202                                      struct ctdb_vnn *vnn)
203 {
204         struct ctdb_interface *best = NULL;
205
206         if (vnn->iface) {
207                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208                                    "still assigned to iface '%s'\n",
209                                    ctdb_addr_to_str(&vnn->public_address),
210                                    ctdb_vnn_iface_string(vnn)));
211                 return 0;
212         }
213
214         best = ctdb_vnn_best_iface(ctdb, vnn);
215         if (best == NULL) {
216                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217                                   "cannot assign to iface any iface\n",
218                                   ctdb_addr_to_str(&vnn->public_address)));
219                 return -1;
220         }
221
222         vnn->iface = best;
223         best->references++;
224         vnn->pnn = ctdb->pnn;
225
226         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227                            "now assigned to iface '%s' refs[%d]\n",
228                            ctdb_addr_to_str(&vnn->public_address),
229                            ctdb_vnn_iface_string(vnn),
230                            best->references));
231         return 0;
232 }
233
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235                                     struct ctdb_vnn *vnn)
236 {
237         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238                            "now unassigned (old iface '%s' refs[%d])\n",
239                            ctdb_addr_to_str(&vnn->public_address),
240                            ctdb_vnn_iface_string(vnn),
241                            vnn->iface?vnn->iface->references:0));
242         if (vnn->iface) {
243                 vnn->iface->references--;
244         }
245         vnn->iface = NULL;
246         if (vnn->pnn == ctdb->pnn) {
247                 vnn->pnn = -1;
248         }
249 }
250
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252                                struct ctdb_vnn *vnn)
253 {
254         int i;
255
256         /* Nodes that are not RUNNING can not host IPs */
257         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
258                 return false;
259         }
260
261         if (vnn->delete_pending) {
262                 return false;
263         }
264
265         if (vnn->iface && vnn->iface->link_up) {
266                 return true;
267         }
268
269         for (i=0; vnn->ifaces[i]; i++) {
270                 struct ctdb_interface *cur;
271
272                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
273                 if (cur == NULL) {
274                         continue;
275                 }
276
277                 if (cur->link_up) {
278                         return true;
279                 }
280         }
281
282         return false;
283 }
284
285 struct ctdb_takeover_arp {
286         struct ctdb_context *ctdb;
287         uint32_t count;
288         ctdb_sock_addr addr;
289         struct ctdb_tcp_array *tcparray;
290         struct ctdb_vnn *vnn;
291 };
292
293
294 /*
295   lists of tcp endpoints
296  */
297 struct ctdb_tcp_list {
298         struct ctdb_tcp_list *prev, *next;
299         struct ctdb_connection connection;
300 };
301
302 /*
303   list of clients to kill on IP release
304  */
305 struct ctdb_client_ip {
306         struct ctdb_client_ip *prev, *next;
307         struct ctdb_context *ctdb;
308         ctdb_sock_addr addr;
309         uint32_t client_id;
310 };
311
312
313 /*
314   send a gratuitous arp
315  */
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317                                   struct tevent_timer *te,
318                                   struct timeval t, void *private_data)
319 {
320         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
321                                                         struct ctdb_takeover_arp);
322         int i, ret;
323         struct ctdb_tcp_array *tcparray;
324         const char *iface = ctdb_vnn_iface_string(arp->vnn);
325
326         ret = ctdb_sys_send_arp(&arp->addr, iface);
327         if (ret != 0) {
328                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329                                   iface, strerror(errno)));
330         }
331
332         tcparray = arp->tcparray;
333         if (tcparray) {
334                 for (i=0;i<tcparray->num;i++) {
335                         struct ctdb_connection *tcon;
336
337                         tcon = &tcparray->connections[i];
338                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
340                                 ctdb_addr_to_str(&tcon->src),
341                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
342                         ret = ctdb_sys_send_tcp(
343                                 &tcon->src,
344                                 &tcon->dst,
345                                 0, 0, 0);
346                         if (ret != 0) {
347                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348                                         ctdb_addr_to_str(&tcon->src)));
349                         }
350                 }
351         }
352
353         arp->count++;
354
355         if (arp->count == CTDB_ARP_REPEAT) {
356                 talloc_free(arp);
357                 return;
358         }
359
360         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362                          ctdb_control_send_arp, arp);
363 }
364
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366                                        struct ctdb_vnn *vnn)
367 {
368         struct ctdb_takeover_arp *arp;
369         struct ctdb_tcp_array *tcparray;
370
371         if (!vnn->takeover_ctx) {
372                 vnn->takeover_ctx = talloc_new(vnn);
373                 if (!vnn->takeover_ctx) {
374                         return -1;
375                 }
376         }
377
378         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
379         if (!arp) {
380                 return -1;
381         }
382
383         arp->ctdb = ctdb;
384         arp->addr = vnn->public_address;
385         arp->vnn  = vnn;
386
387         tcparray = vnn->tcp_array;
388         if (tcparray) {
389                 /* add all of the known tcp connections for this IP to the
390                    list of tcp connections to send tickle acks for */
391                 arp->tcparray = talloc_steal(arp, tcparray);
392
393                 vnn->tcp_array = NULL;
394                 vnn->tcp_update_needed = true;
395         }
396
397         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398                          timeval_zero(), ctdb_control_send_arp, arp);
399
400         return 0;
401 }
402
403 struct takeover_callback_state {
404         struct ctdb_req_control_old *c;
405         ctdb_sock_addr *addr;
406         struct ctdb_vnn *vnn;
407 };
408
409 struct ctdb_do_takeip_state {
410         struct ctdb_req_control_old *c;
411         struct ctdb_vnn *vnn;
412 };
413
414 /*
415   called when takeip event finishes
416  */
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
418                                     void *private_data)
419 {
420         struct ctdb_do_takeip_state *state =
421                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
422         int32_t ret;
423         TDB_DATA data;
424
425         if (status != 0) {
426                 if (status == -ETIME) {
427                         ctdb_ban_self(ctdb);
428                 }
429                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430                                  ctdb_addr_to_str(&state->vnn->public_address),
431                                  ctdb_vnn_iface_string(state->vnn)));
432                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
433
434                 talloc_free(state);
435                 return;
436         }
437
438         if (ctdb->do_checkpublicip) {
439
440         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
441         if (ret != 0) {
442                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
443                 talloc_free(state);
444                 return;
445         }
446
447         }
448
449         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450         data.dsize = strlen((char *)data.dptr) + 1;
451         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
452
453         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
454
455
456         /* the control succeeded */
457         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
458         talloc_free(state);
459         return;
460 }
461
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
463 {
464         state->vnn->update_in_flight = false;
465         return 0;
466 }
467
468 /*
469   take over an ip address
470  */
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472                               struct ctdb_req_control_old *c,
473                               struct ctdb_vnn *vnn)
474 {
475         int ret;
476         struct ctdb_do_takeip_state *state;
477
478         if (vnn->update_in_flight) {
479                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480                                     "update for this IP already in flight\n",
481                                     ctdb_addr_to_str(&vnn->public_address),
482                                     vnn->public_netmask_bits));
483                 return -1;
484         }
485
486         ret = ctdb_vnn_assign_iface(ctdb, vnn);
487         if (ret != 0) {
488                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489                                  "assign a usable interface\n",
490                                  ctdb_addr_to_str(&vnn->public_address),
491                                  vnn->public_netmask_bits));
492                 return -1;
493         }
494
495         state = talloc(vnn, struct ctdb_do_takeip_state);
496         CTDB_NO_MEMORY(ctdb, state);
497
498         state->c = talloc_steal(ctdb, c);
499         state->vnn   = vnn;
500
501         vnn->update_in_flight = true;
502         talloc_set_destructor(state, ctdb_takeip_destructor);
503
504         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505                             ctdb_addr_to_str(&vnn->public_address),
506                             vnn->public_netmask_bits,
507                             ctdb_vnn_iface_string(vnn)));
508
509         ret = ctdb_event_script_callback(ctdb,
510                                          state,
511                                          ctdb_do_takeip_callback,
512                                          state,
513                                          CTDB_EVENT_TAKE_IP,
514                                          "%s %s %u",
515                                          ctdb_vnn_iface_string(vnn),
516                                          ctdb_addr_to_str(&vnn->public_address),
517                                          vnn->public_netmask_bits);
518
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
521                         ctdb_addr_to_str(&vnn->public_address),
522                         ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 struct ctdb_do_updateip_state {
531         struct ctdb_req_control_old *c;
532         struct ctdb_interface *old;
533         struct ctdb_vnn *vnn;
534 };
535
536 /*
537   called when updateip event finishes
538  */
539 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
540                                       void *private_data)
541 {
542         struct ctdb_do_updateip_state *state =
543                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
544         int32_t ret;
545
546         if (status != 0) {
547                 if (status == -ETIME) {
548                         ctdb_ban_self(ctdb);
549                 }
550                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
551                         ctdb_addr_to_str(&state->vnn->public_address),
552                         state->old->name,
553                         ctdb_vnn_iface_string(state->vnn)));
554
555                 /*
556                  * All we can do is reset the old interface
557                  * and let the next run fix it
558                  */
559                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
560                 state->vnn->iface = state->old;
561                 state->vnn->iface->references++;
562
563                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
564                 talloc_free(state);
565                 return;
566         }
567
568         if (ctdb->do_checkpublicip) {
569
570         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
571         if (ret != 0) {
572                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
573                 talloc_free(state);
574                 return;
575         }
576
577         }
578
579         /* the control succeeded */
580         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
581         talloc_free(state);
582         return;
583 }
584
585 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
586 {
587         state->vnn->update_in_flight = false;
588         return 0;
589 }
590
591 /*
592   update (move) an ip address
593  */
594 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
595                                 struct ctdb_req_control_old *c,
596                                 struct ctdb_vnn *vnn)
597 {
598         int ret;
599         struct ctdb_do_updateip_state *state;
600         struct ctdb_interface *old = vnn->iface;
601         const char *new_name;
602
603         if (vnn->update_in_flight) {
604                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
605                                     "update for this IP already in flight\n",
606                                     ctdb_addr_to_str(&vnn->public_address),
607                                     vnn->public_netmask_bits));
608                 return -1;
609         }
610
611         ctdb_vnn_unassign_iface(ctdb, vnn);
612         ret = ctdb_vnn_assign_iface(ctdb, vnn);
613         if (ret != 0) {
614                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
615                                  "assin a usable interface (old iface '%s')\n",
616                                  ctdb_addr_to_str(&vnn->public_address),
617                                  vnn->public_netmask_bits,
618                                  old->name));
619                 return -1;
620         }
621
622         new_name = ctdb_vnn_iface_string(vnn);
623         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
624                 /* A benign update from one interface onto itself.
625                  * no need to run the eventscripts in this case, just return
626                  * success.
627                  */
628                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
629                 return 0;
630         }
631
632         state = talloc(vnn, struct ctdb_do_updateip_state);
633         CTDB_NO_MEMORY(ctdb, state);
634
635         state->c = talloc_steal(ctdb, c);
636         state->old = old;
637         state->vnn = vnn;
638
639         vnn->update_in_flight = true;
640         talloc_set_destructor(state, ctdb_updateip_destructor);
641
642         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
643                             "interface %s to %s\n",
644                             ctdb_addr_to_str(&vnn->public_address),
645                             vnn->public_netmask_bits,
646                             old->name,
647                             new_name));
648
649         ret = ctdb_event_script_callback(ctdb,
650                                          state,
651                                          ctdb_do_updateip_callback,
652                                          state,
653                                          CTDB_EVENT_UPDATE_IP,
654                                          "%s %s %s %u",
655                                          state->old->name,
656                                          new_name,
657                                          ctdb_addr_to_str(&vnn->public_address),
658                                          vnn->public_netmask_bits);
659         if (ret != 0) {
660                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
661                                  ctdb_addr_to_str(&vnn->public_address),
662                                  old->name, new_name));
663                 talloc_free(state);
664                 return -1;
665         }
666
667         return 0;
668 }
669
670 /*
671   Find the vnn of the node that has a public ip address
672   returns -1 if the address is not known as a public address
673  */
674 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
675 {
676         struct ctdb_vnn *vnn;
677
678         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
679                 if (ctdb_same_ip(&vnn->public_address, addr)) {
680                         return vnn;
681                 }
682         }
683
684         return NULL;
685 }
686
687 /*
688   take over an ip address
689  */
690 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
691                                  struct ctdb_req_control_old *c,
692                                  TDB_DATA indata,
693                                  bool *async_reply)
694 {
695         int ret;
696         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
697         struct ctdb_vnn *vnn;
698         bool have_ip = false;
699         bool do_updateip = false;
700         bool do_takeip = false;
701         struct ctdb_interface *best_iface = NULL;
702
703         if (pip->pnn != ctdb->pnn) {
704                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
705                                  "with pnn %d, but we're node %d\n",
706                                  ctdb_addr_to_str(&pip->addr),
707                                  pip->pnn, ctdb->pnn));
708                 return -1;
709         }
710
711         /* update out vnn list */
712         vnn = find_public_ip_vnn(ctdb, &pip->addr);
713         if (vnn == NULL) {
714                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
715                         ctdb_addr_to_str(&pip->addr)));
716                 return 0;
717         }
718
719         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
720                 have_ip = ctdb_sys_have_ip(&pip->addr);
721         }
722         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
723         if (best_iface == NULL) {
724                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
725                                  "a usable interface (old %s, have_ip %d)\n",
726                                  ctdb_addr_to_str(&vnn->public_address),
727                                  vnn->public_netmask_bits,
728                                  ctdb_vnn_iface_string(vnn),
729                                  have_ip));
730                 return -1;
731         }
732
733         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
734                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
735                 have_ip = false;
736         }
737
738
739         if (vnn->iface == NULL && have_ip) {
740                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
741                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
742                                  ctdb_addr_to_str(&vnn->public_address)));
743                 return 0;
744         }
745
746         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
747                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
748                                   "and we have it on iface[%s], but it was assigned to node %d"
749                                   "and we are node %d, banning ourself\n",
750                                  ctdb_addr_to_str(&vnn->public_address),
751                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
752                 ctdb_ban_self(ctdb);
753                 return -1;
754         }
755
756         if (vnn->pnn == -1 && have_ip) {
757                 vnn->pnn = ctdb->pnn;
758                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759                                   "and we already have it on iface[%s], update local daemon\n",
760                                  ctdb_addr_to_str(&vnn->public_address),
761                                   ctdb_vnn_iface_string(vnn)));
762                 return 0;
763         }
764
765         if (vnn->iface) {
766                 if (vnn->iface != best_iface) {
767                         if (!vnn->iface->link_up) {
768                                 do_updateip = true;
769                         } else if (vnn->iface->references > (best_iface->references + 1)) {
770                                 /* only move when the rebalance gains something */
771                                         do_updateip = true;
772                         }
773                 }
774         }
775
776         if (!have_ip) {
777                 if (do_updateip) {
778                         ctdb_vnn_unassign_iface(ctdb, vnn);
779                         do_updateip = false;
780                 }
781                 do_takeip = true;
782         }
783
784         if (do_takeip) {
785                 ret = ctdb_do_takeip(ctdb, c, vnn);
786                 if (ret != 0) {
787                         return -1;
788                 }
789         } else if (do_updateip) {
790                 ret = ctdb_do_updateip(ctdb, c, vnn);
791                 if (ret != 0) {
792                         return -1;
793                 }
794         } else {
795                 /*
796                  * The interface is up and the kernel known the ip
797                  * => do nothing
798                  */
799                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
800                         ctdb_addr_to_str(&pip->addr),
801                         vnn->public_netmask_bits,
802                         ctdb_vnn_iface_string(vnn)));
803                 return 0;
804         }
805
806         /* tell ctdb_control.c that we will be replying asynchronously */
807         *async_reply = true;
808
809         return 0;
810 }
811
812 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
813 {
814         DLIST_REMOVE(ctdb->vnn, vnn);
815         ctdb_vnn_unassign_iface(ctdb, vnn);
816         ctdb_remove_orphaned_ifaces(ctdb, vnn);
817         talloc_free(vnn);
818 }
819
820 /*
821   called when releaseip event finishes
822  */
823 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
824                                 void *private_data)
825 {
826         struct takeover_callback_state *state = 
827                 talloc_get_type(private_data, struct takeover_callback_state);
828         TDB_DATA data;
829
830         if (status == -ETIME) {
831                 ctdb_ban_self(ctdb);
832         }
833
834         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
835                 if  (ctdb_sys_have_ip(state->addr)) {
836                         DEBUG(DEBUG_ERR,
837                               ("IP %s still hosted during release IP callback, failing\n",
838                                ctdb_addr_to_str(state->addr)));
839                         ctdb_request_control_reply(ctdb, state->c,
840                                                    NULL, -1, NULL);
841                         talloc_free(state);
842                         return;
843                 }
844         }
845
846         /* send a message to all clients of this node telling them
847            that the cluster has been reconfigured and they should
848            release any sockets on this IP */
849         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
850         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
851         data.dsize = strlen((char *)data.dptr)+1;
852
853         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
854
855         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
856
857         ctdb_vnn_unassign_iface(ctdb, state->vnn);
858
859         /* Process the IP if it has been marked for deletion */
860         if (state->vnn->delete_pending) {
861                 do_delete_ip(ctdb, state->vnn);
862                 state->vnn = NULL;
863         }
864
865         /* the control succeeded */
866         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
867         talloc_free(state);
868 }
869
870 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
871 {
872         if (state->vnn != NULL) {
873                 state->vnn->update_in_flight = false;
874         }
875         return 0;
876 }
877
878 /*
879   release an ip address
880  */
881 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
882                                 struct ctdb_req_control_old *c,
883                                 TDB_DATA indata, 
884                                 bool *async_reply)
885 {
886         int ret;
887         struct takeover_callback_state *state;
888         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
889         struct ctdb_vnn *vnn;
890         char *iface;
891
892         /* update our vnn list */
893         vnn = find_public_ip_vnn(ctdb, &pip->addr);
894         if (vnn == NULL) {
895                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
896                         ctdb_addr_to_str(&pip->addr)));
897                 return 0;
898         }
899         vnn->pnn = pip->pnn;
900
901         /* stop any previous arps */
902         talloc_free(vnn->takeover_ctx);
903         vnn->takeover_ctx = NULL;
904
905         /* Some ctdb tool commands (e.g. moveip) send
906          * lazy multicast to drop an IP from any node that isn't the
907          * intended new node.  The following causes makes ctdbd ignore
908          * a release for any address it doesn't host.
909          */
910         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
911                 if (!ctdb_sys_have_ip(&pip->addr)) {
912                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
913                                 ctdb_addr_to_str(&pip->addr),
914                                 vnn->public_netmask_bits,
915                                 ctdb_vnn_iface_string(vnn)));
916                         ctdb_vnn_unassign_iface(ctdb, vnn);
917                         return 0;
918                 }
919         } else {
920                 if (vnn->iface == NULL) {
921                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
922                                            ctdb_addr_to_str(&pip->addr),
923                                            vnn->public_netmask_bits));
924                         return 0;
925                 }
926         }
927
928         /* There is a potential race between take_ip and us because we
929          * update the VNN via a callback that run when the
930          * eventscripts have been run.  Avoid the race by allowing one
931          * update to be in flight at a time.
932          */
933         if (vnn->update_in_flight) {
934                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
935                                     "update for this IP already in flight\n",
936                                     ctdb_addr_to_str(&vnn->public_address),
937                                     vnn->public_netmask_bits));
938                 return -1;
939         }
940
941         iface = strdup(ctdb_vnn_iface_string(vnn));
942
943         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
944                 ctdb_addr_to_str(&pip->addr),
945                 vnn->public_netmask_bits,
946                 iface,
947                 pip->pnn));
948
949         state = talloc(ctdb, struct takeover_callback_state);
950         if (state == NULL) {
951                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
952                                __FILE__, __LINE__);
953                 free(iface);
954                 return -1;
955         }
956
957         state->c = talloc_steal(state, c);
958         state->addr = talloc(state, ctdb_sock_addr);       
959         if (state->addr == NULL) {
960                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
961                                __FILE__, __LINE__);
962                 free(iface);
963                 talloc_free(state);
964                 return -1;
965         }
966         *state->addr = pip->addr;
967         state->vnn   = vnn;
968
969         vnn->update_in_flight = true;
970         talloc_set_destructor(state, ctdb_releaseip_destructor);
971
972         ret = ctdb_event_script_callback(ctdb, 
973                                          state, release_ip_callback, state,
974                                          CTDB_EVENT_RELEASE_IP,
975                                          "%s %s %u",
976                                          iface,
977                                          ctdb_addr_to_str(&pip->addr),
978                                          vnn->public_netmask_bits);
979         free(iface);
980         if (ret != 0) {
981                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
982                         ctdb_addr_to_str(&pip->addr),
983                         ctdb_vnn_iface_string(vnn)));
984                 talloc_free(state);
985                 return -1;
986         }
987
988         /* tell the control that we will be reply asynchronously */
989         *async_reply = true;
990         return 0;
991 }
992
993 static int ctdb_add_public_address(struct ctdb_context *ctdb,
994                                    ctdb_sock_addr *addr,
995                                    unsigned mask, const char *ifaces,
996                                    bool check_address)
997 {
998         struct ctdb_vnn      *vnn;
999         uint32_t num = 0;
1000         char *tmp;
1001         const char *iface;
1002         int i;
1003         int ret;
1004
1005         tmp = strdup(ifaces);
1006         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1007                 if (!ctdb_sys_check_iface_exists(iface)) {
1008                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1009                         free(tmp);
1010                         return -1;
1011                 }
1012         }
1013         free(tmp);
1014
1015         /* Verify that we don't have an entry for this ip yet */
1016         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1017                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1018                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1019                                 ctdb_addr_to_str(addr)));
1020                         return -1;
1021                 }               
1022         }
1023
1024         /* create a new vnn structure for this ip address */
1025         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1026         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1027         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1028         tmp = talloc_strdup(vnn, ifaces);
1029         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1030         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1031                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1032                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1033                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1034                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1035                 num++;
1036         }
1037         talloc_free(tmp);
1038         vnn->ifaces[num] = NULL;
1039         vnn->public_address      = *addr;
1040         vnn->public_netmask_bits = mask;
1041         vnn->pnn                 = -1;
1042         if (check_address) {
1043                 if (ctdb_sys_have_ip(addr)) {
1044                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1045                         vnn->pnn = ctdb->pnn;
1046                 }
1047         }
1048
1049         for (i=0; vnn->ifaces[i]; i++) {
1050                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1051                 if (ret != 0) {
1052                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1053                                            "for public_address[%s]\n",
1054                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1055                         talloc_free(vnn);
1056                         return -1;
1057                 }
1058         }
1059
1060         DLIST_ADD(ctdb->vnn, vnn);
1061
1062         return 0;
1063 }
1064
1065 /*
1066   setup the public address lists from a file
1067 */
1068 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1069 {
1070         char **lines;
1071         int nlines;
1072         int i;
1073
1074         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1075         if (lines == NULL) {
1076                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1077                 return -1;
1078         }
1079         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1080                 nlines--;
1081         }
1082
1083         for (i=0;i<nlines;i++) {
1084                 unsigned mask;
1085                 ctdb_sock_addr addr;
1086                 const char *addrstr;
1087                 const char *ifaces;
1088                 char *tok, *line;
1089
1090                 line = lines[i];
1091                 while ((*line == ' ') || (*line == '\t')) {
1092                         line++;
1093                 }
1094                 if (*line == '#') {
1095                         continue;
1096                 }
1097                 if (strcmp(line, "") == 0) {
1098                         continue;
1099                 }
1100                 tok = strtok(line, " \t");
1101                 addrstr = tok;
1102                 tok = strtok(NULL, " \t");
1103                 if (tok == NULL) {
1104                         if (NULL == ctdb->default_public_interface) {
1105                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1106                                          i+1));
1107                                 talloc_free(lines);
1108                                 return -1;
1109                         }
1110                         ifaces = ctdb->default_public_interface;
1111                 } else {
1112                         ifaces = tok;
1113                 }
1114
1115                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1116                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1117                         talloc_free(lines);
1118                         return -1;
1119                 }
1120                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1121                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1122                         talloc_free(lines);
1123                         return -1;
1124                 }
1125         }
1126
1127
1128         talloc_free(lines);
1129         return 0;
1130 }
1131
1132 static void *add_ip_callback(void *parm, void *data)
1133 {
1134         struct public_ip_list *this_ip = parm;
1135         struct public_ip_list *prev_ip = data;
1136
1137         if (prev_ip == NULL) {
1138                 return parm;
1139         }
1140         if (this_ip->pnn == -1) {
1141                 this_ip->pnn = prev_ip->pnn;
1142         }
1143
1144         return parm;
1145 }
1146
1147 static int getips_count_callback(void *param, void *data)
1148 {
1149         struct public_ip_list **ip_list = (struct public_ip_list **)param;
1150         struct public_ip_list *new_ip = (struct public_ip_list *)data;
1151
1152         new_ip->next = *ip_list;
1153         *ip_list     = new_ip;
1154         return 0;
1155 }
1156
1157 static struct ctdb_public_ip_list *
1158 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1159                              TALLOC_CTX *mem_ctx,
1160                              struct ctdb_node_map_old *nodemap,
1161                              uint32_t public_ip_flags)
1162 {
1163         int j, ret;
1164         struct ctdb_public_ip_list_old *ip_list;
1165         struct ctdb_public_ip_list *public_ips;
1166
1167         public_ips = talloc_zero_array(mem_ctx,
1168                                        struct ctdb_public_ip_list,
1169                                        nodemap->num);
1170         if (public_ips == NULL) {
1171                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1172                 return NULL;
1173         }
1174
1175         for (j = 0; j < nodemap->num; j++) {
1176                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1177                         continue;
1178                 }
1179
1180                 /* Retrieve the list of public IPs from the
1181                  * node. Flags says whether it is known or
1182                  * available. */
1183                 ret = ctdb_ctrl_get_public_ips_flags(
1184                         ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1185                         public_ip_flags, &ip_list);
1186                 if (ret != 0) {
1187                         DEBUG(DEBUG_ERR,
1188                               ("Failed to read public IPs from node: %u\n", j));
1189                         talloc_free(public_ips);
1190                         return NULL;
1191                 }
1192                 public_ips[j].num = ip_list->num;
1193                 if (ip_list->num == 0) {
1194                         talloc_free(ip_list);
1195                         continue;
1196                 }
1197                 public_ips[j].ip = talloc_zero_array(public_ips,
1198                                                      struct ctdb_public_ip,
1199                                                      ip_list->num);
1200                 if (public_ips[j].ip == NULL) {
1201                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1202                         talloc_free(public_ips);
1203                         return NULL;
1204                 }
1205                 memcpy(public_ips[j].ip, &ip_list->ips[0],
1206                        sizeof(struct ctdb_public_ip) * ip_list->num);
1207                 talloc_free(ip_list);
1208         }
1209
1210         return public_ips;
1211 }
1212
1213 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1214                                          struct ipalloc_state *ipalloc_state,
1215                                          struct ctdb_node_map_old *nodemap)
1216 {
1217         struct ctdb_public_ip_list *ip_list;
1218
1219         if (ipalloc_state->num != nodemap->num) {
1220                 DEBUG(DEBUG_ERR,
1221                       (__location__
1222                        " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1223                        ipalloc_state->num, nodemap->num));
1224                 return -1;
1225         }
1226
1227         /* Fetch lists of known public IPs from all nodes */
1228         ip_list = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state, nodemap, 0);
1229         if (ip_list == NULL) {
1230                 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1231                 return -1;
1232         }
1233         ipalloc_state->known_public_ips = ip_list;
1234
1235
1236         /* Fetch lists of available public IPs from all nodes */
1237         ip_list = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state, nodemap,
1238                                                CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1239         if (ip_list == NULL) {
1240                 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1241                 return -1;
1242         }
1243         ipalloc_state->available_public_ips = ip_list;
1244
1245         return 0;
1246 }
1247
1248 static struct public_ip_list *
1249 create_merged_ip_list(struct ipalloc_state *ipalloc_state)
1250 {
1251         int i, j;
1252         struct public_ip_list *ip_list;
1253         struct ctdb_public_ip_list *public_ips;
1254         struct trbt_tree *ip_tree;
1255
1256         ip_tree = trbt_create(ipalloc_state, 0);
1257
1258         if (ipalloc_state->known_public_ips == NULL) {
1259                 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
1260                 return NULL;
1261         }
1262
1263         for (i=0; i < ipalloc_state->num; i++) {
1264
1265                 public_ips = &ipalloc_state->known_public_ips[i];
1266
1267                 for (j=0; j < public_ips->num; j++) {
1268                         struct public_ip_list *tmp_ip;
1269
1270                         /* This is returned as part of ip_list */
1271                         tmp_ip = talloc_zero(ipalloc_state, struct public_ip_list);
1272                         if (tmp_ip == NULL) {
1273                                 DEBUG(DEBUG_ERR,
1274                                       (__location__ " out of memory\n"));
1275                                 talloc_free(ip_tree);
1276                                 return NULL;
1277                         }
1278
1279                         /* Do not use information about IP addresses hosted
1280                          * on other nodes, it may not be accurate */
1281                         if (public_ips->ip[j].pnn == i) {
1282                                 tmp_ip->pnn = public_ips->ip[j].pnn;
1283                         } else {
1284                                 tmp_ip->pnn = -1;
1285                         }
1286                         tmp_ip->addr = public_ips->ip[j].addr;
1287                         tmp_ip->next = NULL;
1288
1289                         trbt_insertarray32_callback(ip_tree,
1290                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1291                                 add_ip_callback,
1292                                 tmp_ip);
1293                 }
1294         }
1295
1296         ip_list = NULL;
1297         trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1298         talloc_free(ip_tree);
1299
1300         return ip_list;
1301 }
1302
1303 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1304 {
1305         int i;
1306
1307         for (i=0;i<nodemap->num;i++) {
1308                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1309                         /* Found one completely healthy node */
1310                         return false;
1311                 }
1312         }
1313
1314         return true;
1315 }
1316
1317 struct get_tunable_callback_data {
1318         const char *tunable;
1319         uint32_t *out;
1320         bool fatal;
1321 };
1322
1323 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1324                                  int32_t res, TDB_DATA outdata,
1325                                  void *callback)
1326 {
1327         struct get_tunable_callback_data *cd =
1328                 (struct get_tunable_callback_data *)callback;
1329         int size;
1330
1331         if (res != 0) {
1332                 /* Already handled in fail callback */
1333                 return;
1334         }
1335
1336         if (outdata.dsize != sizeof(uint32_t)) {
1337                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1338                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1339                                  (int)outdata.dsize));
1340                 cd->fatal = true;
1341                 return;
1342         }
1343
1344         size = talloc_array_length(cd->out);
1345         if (pnn >= size) {
1346                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1347                                  cd->tunable, pnn, size));
1348                 return;
1349         }
1350
1351                 
1352         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1353 }
1354
1355 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1356                                        int32_t res, TDB_DATA outdata,
1357                                        void *callback)
1358 {
1359         struct get_tunable_callback_data *cd =
1360                 (struct get_tunable_callback_data *)callback;
1361
1362         switch (res) {
1363         case -ETIME:
1364                 DEBUG(DEBUG_ERR,
1365                       ("Timed out getting tunable \"%s\" from node %d\n",
1366                        cd->tunable, pnn));
1367                 cd->fatal = true;
1368                 break;
1369         case -EINVAL:
1370         case -1:
1371                 DEBUG(DEBUG_WARNING,
1372                       ("Tunable \"%s\" not implemented on node %d\n",
1373                        cd->tunable, pnn));
1374                 break;
1375         default:
1376                 DEBUG(DEBUG_ERR,
1377                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1378                        cd->tunable, pnn));
1379                 cd->fatal = true;
1380         }
1381 }
1382
1383 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1384                                         TALLOC_CTX *tmp_ctx,
1385                                         struct ctdb_node_map_old *nodemap,
1386                                         const char *tunable,
1387                                         uint32_t default_value)
1388 {
1389         TDB_DATA data;
1390         struct ctdb_control_get_tunable *t;
1391         uint32_t *nodes;
1392         uint32_t *tvals;
1393         struct get_tunable_callback_data callback_data;
1394         int i;
1395
1396         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1397         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1398         for (i=0; i<nodemap->num; i++) {
1399                 tvals[i] = default_value;
1400         }
1401                 
1402         callback_data.out = tvals;
1403         callback_data.tunable = tunable;
1404         callback_data.fatal = false;
1405
1406         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1407         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1408         t = (struct ctdb_control_get_tunable *)data.dptr;
1409         t->length = strlen(tunable)+1;
1410         memcpy(t->name, tunable, t->length);
1411         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1412         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1413                                       nodes, 0, TAKEOVER_TIMEOUT(),
1414                                       false, data,
1415                                       get_tunable_callback,
1416                                       get_tunable_fail_callback,
1417                                       &callback_data) != 0) {
1418                 if (callback_data.fatal) {
1419                         talloc_free(tvals);
1420                         tvals = NULL;
1421                 }
1422         }
1423         talloc_free(nodes);
1424         talloc_free(data.dptr);
1425
1426         return tvals;
1427 }
1428
1429 /* Set internal flags for IP allocation:
1430  *   Clear ip flags
1431  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1432  *   Set NOIPHOST ip flag for each INACTIVE node
1433  *   if all nodes are disabled:
1434  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1435  *   else
1436  *     Set NOIPHOST ip flags for disabled nodes
1437  */
1438 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1439                                  struct ctdb_node_map_old *nodemap,
1440                                  uint32_t *tval_noiptakeover,
1441                                  uint32_t *tval_noiphostonalldisabled)
1442 {
1443         int i;
1444
1445         for (i=0;i<nodemap->num;i++) {
1446                 /* Can not take IPs on node with NoIPTakeover set */
1447                 if (tval_noiptakeover[i] != 0) {
1448                         ipalloc_state->noiptakeover[i] = true;
1449                 }
1450
1451                 /* Can not host IPs on INACTIVE node */
1452                 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1453                         ipalloc_state->noiphost[i] = true;
1454                 }
1455         }
1456
1457         if (all_nodes_are_disabled(nodemap)) {
1458                 /* If all nodes are disabled, can not host IPs on node
1459                  * with NoIPHostOnAllDisabled set
1460                  */
1461                 for (i=0;i<nodemap->num;i++) {
1462                         if (tval_noiphostonalldisabled[i] != 0) {
1463                                 ipalloc_state->noiphost[i] = true;
1464                         }
1465                 }
1466         } else {
1467                 /* If some nodes are not disabled, then can not host
1468                  * IPs on DISABLED node
1469                  */
1470                 for (i=0;i<nodemap->num;i++) {
1471                         if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1472                                 ipalloc_state->noiphost[i] = true;
1473                         }
1474                 }
1475         }
1476 }
1477
1478 static bool set_ipflags(struct ctdb_context *ctdb,
1479                         struct ipalloc_state *ipalloc_state,
1480                         struct ctdb_node_map_old *nodemap)
1481 {
1482         uint32_t *tval_noiptakeover;
1483         uint32_t *tval_noiphostonalldisabled;
1484
1485         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1486                                                    "NoIPTakeover", 0);
1487         if (tval_noiptakeover == NULL) {
1488                 return false;
1489         }
1490
1491         tval_noiphostonalldisabled =
1492                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1493                                        "NoIPHostOnAllDisabled", 0);
1494         if (tval_noiphostonalldisabled == NULL) {
1495                 /* Caller frees tmp_ctx */
1496                 return false;
1497         }
1498
1499         set_ipflags_internal(ipalloc_state, nodemap,
1500                              tval_noiptakeover,
1501                              tval_noiphostonalldisabled);
1502
1503         talloc_free(tval_noiptakeover);
1504         talloc_free(tval_noiphostonalldisabled);
1505
1506         return true;
1507 }
1508
1509 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1510                                                  TALLOC_CTX *mem_ctx)
1511 {
1512         struct ipalloc_state *ipalloc_state =
1513                 talloc_zero(mem_ctx, struct ipalloc_state);
1514         if (ipalloc_state == NULL) {
1515                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1516                 return NULL;
1517         }
1518
1519         ipalloc_state->num = ctdb->num_nodes;
1520
1521         ipalloc_state->noiptakeover =
1522                 talloc_zero_array(ipalloc_state,
1523                                   bool,
1524                                   ipalloc_state->num);
1525         if (ipalloc_state->noiptakeover == NULL) {
1526                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1527                 goto fail;
1528         }
1529         ipalloc_state->noiphost =
1530                 talloc_zero_array(ipalloc_state,
1531                                   bool,
1532                                   ipalloc_state->num);
1533         if (ipalloc_state->noiphost == NULL) {
1534                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1535                 goto fail;
1536         }
1537
1538         if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1539                 ipalloc_state->algorithm = IPALLOC_LCP2;
1540         } else if (1 == ctdb->tunable.deterministic_public_ips) {
1541                 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1542         } else {
1543                 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1544         }
1545
1546         ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1547
1548         return ipalloc_state;
1549 fail:
1550         talloc_free(ipalloc_state);
1551         return NULL;
1552 }
1553
1554 struct takeover_callback_data {
1555         uint32_t num_nodes;
1556         unsigned int *fail_count;
1557 };
1558
1559 static struct takeover_callback_data *
1560 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1561                             uint32_t num_nodes)
1562 {
1563         static struct takeover_callback_data *takeover_data;
1564
1565         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1566         if (takeover_data == NULL) {
1567                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1568                 return NULL;
1569         }
1570
1571         takeover_data->fail_count = talloc_zero_array(takeover_data,
1572                                                       unsigned int, num_nodes);
1573         if (takeover_data->fail_count == NULL) {
1574                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1575                 talloc_free(takeover_data);
1576                 return NULL;
1577         }
1578
1579         takeover_data->num_nodes = num_nodes;
1580
1581         return takeover_data;
1582 }
1583
1584 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1585                                        uint32_t node_pnn, int32_t res,
1586                                        TDB_DATA outdata, void *callback_data)
1587 {
1588         struct takeover_callback_data *cd =
1589                 talloc_get_type_abort(callback_data,
1590                                       struct takeover_callback_data);
1591
1592         if (node_pnn >= cd->num_nodes) {
1593                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1594                 return;
1595         }
1596
1597         if (cd->fail_count[node_pnn] == 0) {
1598                 DEBUG(DEBUG_ERR,
1599                       ("Node %u failed the takeover run\n", node_pnn));
1600         }
1601
1602         cd->fail_count[node_pnn]++;
1603 }
1604
1605 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1606                                           struct takeover_callback_data *tcd)
1607 {
1608         unsigned int max_fails = 0;
1609         uint32_t max_pnn = -1;
1610         uint32_t i;
1611
1612         for (i = 0; i < tcd->num_nodes; i++) {
1613                 if (tcd->fail_count[i] > max_fails) {
1614                         max_pnn = i;
1615                         max_fails = tcd->fail_count[i];
1616                 }
1617         }
1618
1619         if (max_fails > 0) {
1620                 int ret;
1621                 TDB_DATA data;
1622
1623                 DEBUG(DEBUG_ERR,
1624                       ("Sending banning credits to %u with fail count %u\n",
1625                        max_pnn, max_fails));
1626
1627                 data.dptr = (uint8_t *)&max_pnn;
1628                 data.dsize = sizeof(uint32_t);
1629                 ret = ctdb_client_send_message(ctdb,
1630                                                CTDB_BROADCAST_CONNECTED,
1631                                                CTDB_SRVID_BANNING,
1632                                                data);
1633                 if (ret != 0) {
1634                         DEBUG(DEBUG_ERR,
1635                               ("Failed to set banning credits for node %u\n",
1636                                max_pnn));
1637                 }
1638         }
1639 }
1640
1641 /*
1642  * Recalculate the allocation of public IPs to nodes and have the
1643  * nodes host their allocated addresses.
1644  *
1645  * - Allocate memory for IP allocation state, including per node
1646  *   arrays
1647  * - Populate IP allocation algorithm in IP allocation state
1648  * - Populate local value of tunable NoIPFailback in IP allocation
1649      state - this is really a cluster-wide configuration variable and
1650      only the value form the master node is used
1651  * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1652  *   connected nodes - this is done separately so tunable values can
1653  *   be faked in unit testing
1654  * - Populate NoIPTakover tunable in IP allocation state
1655  * - Populate NoIPHost in IP allocation state, derived from node flags
1656  *   and NoIPHostOnAllDisabled tunable
1657  * - Retrieve and populate known and available IP lists in IP
1658  *   allocation state
1659  * - If no available IP addresses then early exit
1660  * - Build list of (known IPs, currently assigned node)
1661  * - Populate list of nodes to force rebalance - internal structure,
1662  *   currently no way to fetch, only used by LCP2 for nodes that have
1663  *   had new IP addresses added
1664  * - Run IP allocation algorithm
1665  * - Send RELEASE_IP to all nodes for IPs they should not host
1666  * - Send TAKE_IP to all nodes for IPs they should host
1667  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1668  */
1669 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1670                       uint32_t *force_rebalance_nodes)
1671 {
1672         int i, ret;
1673         struct ctdb_public_ip ip;
1674         uint32_t *nodes;
1675         struct public_ip_list *all_ips, *tmp_ip;
1676         TDB_DATA data;
1677         struct timeval timeout;
1678         struct client_async_data *async_data;
1679         struct ctdb_client_control_state *state;
1680         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1681         struct ipalloc_state *ipalloc_state;
1682         struct takeover_callback_data *takeover_data;
1683         bool can_host_ips;
1684
1685         /* Initialise fail callback data to be used with
1686          * takeover_run_fail_callback().  A failure in any of the
1687          * following steps will cause an early return, so this can be
1688          * reused for each of those steps without re-initialising. */
1689         takeover_data = takeover_callback_data_init(tmp_ctx,
1690                                                     nodemap->num);
1691         if (takeover_data == NULL) {
1692                 talloc_free(tmp_ctx);
1693                 return -1;
1694         }
1695
1696         /*
1697          * ip failover is completely disabled, just send out the 
1698          * ipreallocated event.
1699          */
1700         if (ctdb->tunable.disable_ip_failover != 0) {
1701                 goto ipreallocated;
1702         }
1703
1704         ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1705         if (ipalloc_state == NULL) {
1706                 talloc_free(tmp_ctx);
1707                 return -1;
1708         }
1709
1710         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1711                 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1712                 talloc_free(tmp_ctx);
1713                 return -1;
1714         }
1715
1716         /* Fetch known/available public IPs from each active node */
1717         ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1718         if (ret != 0) {
1719                 talloc_free(tmp_ctx);
1720                 return -1;
1721         }
1722
1723         /* Short-circuit IP allocation if no node has available IPs */
1724         can_host_ips = false;
1725         for (i=0; i < ipalloc_state->num; i++) {
1726                 if (ipalloc_state->available_public_ips[i].num != 0) {
1727                         can_host_ips = true;
1728                 }
1729         }
1730         if (!can_host_ips) {
1731                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1732                 goto ipreallocated;
1733         }
1734
1735         /* since nodes only know about those public addresses that
1736            can be served by that particular node, no single node has
1737            a full list of all public addresses that exist in the cluster.
1738            Walk over all node structures and create a merged list of
1739            all public addresses that exist in the cluster.
1740         */
1741         all_ips = create_merged_ip_list(ipalloc_state);
1742         if (all_ips == NULL) {
1743                 talloc_free(tmp_ctx);
1744                 return -1;
1745         }
1746         ipalloc_state->all_ips = all_ips;
1747
1748         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1749
1750         /* Do the IP reassignment calculations */
1751         ipalloc(ipalloc_state);
1752
1753         /* Now tell all nodes to release any public IPs should not
1754          * host.  This will be a NOOP on nodes that don't currently
1755          * hold the given IP.
1756          */
1757         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1758         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1759
1760         async_data->fail_callback = takeover_run_fail_callback;
1761         async_data->callback_data = takeover_data;
1762
1763         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1764
1765         /* Send a RELEASE_IP to all nodes that should not be hosting
1766          * each IP.  For each IP, all but one of these will be
1767          * redundant.  However, the redundant ones are used to tell
1768          * nodes which node should be hosting the IP so that commands
1769          * like "ctdb ip" can display a particular nodes idea of who
1770          * is hosting what. */
1771         for (i=0;i<nodemap->num;i++) {
1772                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1773                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1774                         continue;
1775                 }
1776
1777                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1778                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1779                                 /* This node should be serving this
1780                                    vnn so don't tell it to release the ip
1781                                 */
1782                                 continue;
1783                         }
1784                         ip.pnn  = tmp_ip->pnn;
1785                         ip.addr = tmp_ip->addr;
1786
1787                         timeout = TAKEOVER_TIMEOUT();
1788                         data.dsize = sizeof(ip);
1789                         data.dptr  = (uint8_t *)&ip;
1790                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1791                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1792                                                   data, async_data,
1793                                                   &timeout, NULL);
1794                         if (state == NULL) {
1795                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1796                                 talloc_free(tmp_ctx);
1797                                 return -1;
1798                         }
1799
1800                         ctdb_client_async_add(async_data, state);
1801                 }
1802         }
1803         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1804                 DEBUG(DEBUG_ERR,
1805                       ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1806                 goto fail;
1807         }
1808         talloc_free(async_data);
1809
1810
1811         /* For each IP, send a TAKOVER_IP to the node that should be
1812          * hosting it.  Many of these will often be redundant (since
1813          * the allocation won't have changed) but they can be useful
1814          * to recover from inconsistencies. */
1815         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1816         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1817
1818         async_data->fail_callback = takeover_run_fail_callback;
1819         async_data->callback_data = takeover_data;
1820
1821         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1822                 if (tmp_ip->pnn == -1) {
1823                         /* this IP won't be taken over */
1824                         continue;
1825                 }
1826
1827                 ip.pnn  = tmp_ip->pnn;
1828                 ip.addr = tmp_ip->addr;
1829
1830                 timeout = TAKEOVER_TIMEOUT();
1831                 data.dsize = sizeof(ip);
1832                 data.dptr  = (uint8_t *)&ip;
1833                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1834                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1835                                           data, async_data, &timeout, NULL);
1836                 if (state == NULL) {
1837                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1838                         talloc_free(tmp_ctx);
1839                         return -1;
1840                 }
1841
1842                 ctdb_client_async_add(async_data, state);
1843         }
1844         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1845                 DEBUG(DEBUG_ERR,
1846                       ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1847                 goto fail;
1848         }
1849
1850 ipreallocated:
1851         /*
1852          * Tell all nodes to run eventscripts to process the
1853          * "ipreallocated" event.  This can do a lot of things,
1854          * including restarting services to reconfigure them if public
1855          * IPs have moved.  Once upon a time this event only used to
1856          * update natgw.
1857          */
1858         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1859         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1860                                         nodes, 0, TAKEOVER_TIMEOUT(),
1861                                         false, tdb_null,
1862                                         NULL, takeover_run_fail_callback,
1863                                         takeover_data);
1864         if (ret != 0) {
1865                 DEBUG(DEBUG_ERR,
1866                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1867                 goto fail;
1868         }
1869
1870         talloc_free(tmp_ctx);
1871         return ret;
1872
1873 fail:
1874         takeover_run_process_failures(ctdb, takeover_data);
1875         talloc_free(tmp_ctx);
1876         return -1;
1877 }
1878
1879
1880 /*
1881   destroy a ctdb_client_ip structure
1882  */
1883 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1884 {
1885         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1886                 ctdb_addr_to_str(&ip->addr),
1887                 ntohs(ip->addr.ip.sin_port),
1888                 ip->client_id));
1889
1890         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1891         return 0;
1892 }
1893
1894 /*
1895   called by a client to inform us of a TCP connection that it is managing
1896   that should tickled with an ACK when IP takeover is done
1897  */
1898 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1899                                 TDB_DATA indata)
1900 {
1901         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1902         struct ctdb_connection *tcp_sock = NULL;
1903         struct ctdb_tcp_list *tcp;
1904         struct ctdb_connection t;
1905         int ret;
1906         TDB_DATA data;
1907         struct ctdb_client_ip *ip;
1908         struct ctdb_vnn *vnn;
1909         ctdb_sock_addr addr;
1910
1911         /* If we don't have public IPs, tickles are useless */
1912         if (ctdb->vnn == NULL) {
1913                 return 0;
1914         }
1915
1916         tcp_sock = (struct ctdb_connection *)indata.dptr;
1917
1918         addr = tcp_sock->src;
1919         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1920         addr = tcp_sock->dst;
1921         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1922
1923         ZERO_STRUCT(addr);
1924         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1925         vnn = find_public_ip_vnn(ctdb, &addr);
1926         if (vnn == NULL) {
1927                 switch (addr.sa.sa_family) {
1928                 case AF_INET:
1929                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1930                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1931                                         ctdb_addr_to_str(&addr)));
1932                         }
1933                         break;
1934                 case AF_INET6:
1935                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1936                                 ctdb_addr_to_str(&addr)));
1937                         break;
1938                 default:
1939                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1940                 }
1941
1942                 return 0;
1943         }
1944
1945         if (vnn->pnn != ctdb->pnn) {
1946                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1947                         ctdb_addr_to_str(&addr),
1948                         client_id, client->pid));
1949                 /* failing this call will tell smbd to die */
1950                 return -1;
1951         }
1952
1953         ip = talloc(client, struct ctdb_client_ip);
1954         CTDB_NO_MEMORY(ctdb, ip);
1955
1956         ip->ctdb      = ctdb;
1957         ip->addr      = addr;
1958         ip->client_id = client_id;
1959         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1960         DLIST_ADD(ctdb->client_ip_list, ip);
1961
1962         tcp = talloc(client, struct ctdb_tcp_list);
1963         CTDB_NO_MEMORY(ctdb, tcp);
1964
1965         tcp->connection.src = tcp_sock->src;
1966         tcp->connection.dst = tcp_sock->dst;
1967
1968         DLIST_ADD(client->tcp_list, tcp);
1969
1970         t.src = tcp_sock->src;
1971         t.dst = tcp_sock->dst;
1972
1973         data.dptr = (uint8_t *)&t;
1974         data.dsize = sizeof(t);
1975
1976         switch (addr.sa.sa_family) {
1977         case AF_INET:
1978                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1979                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1980                         ctdb_addr_to_str(&tcp_sock->src),
1981                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1982                 break;
1983         case AF_INET6:
1984                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1985                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1986                         ctdb_addr_to_str(&tcp_sock->src),
1987                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1988                 break;
1989         default:
1990                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1991         }
1992
1993
1994         /* tell all nodes about this tcp connection */
1995         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1996                                        CTDB_CONTROL_TCP_ADD,
1997                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1998         if (ret != 0) {
1999                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2000                 return -1;
2001         }
2002
2003         return 0;
2004 }
2005
2006 /*
2007   find a tcp address on a list
2008  */
2009 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2010                                            struct ctdb_connection *tcp)
2011 {
2012         int i;
2013
2014         if (array == NULL) {
2015                 return NULL;
2016         }
2017
2018         for (i=0;i<array->num;i++) {
2019                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2020                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2021                         return &array->connections[i];
2022                 }
2023         }
2024         return NULL;
2025 }
2026
2027
2028
2029 /*
2030   called by a daemon to inform us of a TCP connection that one of its
2031   clients managing that should tickled with an ACK when IP takeover is
2032   done
2033  */
2034 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2035 {
2036         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2037         struct ctdb_tcp_array *tcparray;
2038         struct ctdb_connection tcp;
2039         struct ctdb_vnn *vnn;
2040
2041         /* If we don't have public IPs, tickles are useless */
2042         if (ctdb->vnn == NULL) {
2043                 return 0;
2044         }
2045
2046         vnn = find_public_ip_vnn(ctdb, &p->dst);
2047         if (vnn == NULL) {
2048                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2049                         ctdb_addr_to_str(&p->dst)));
2050
2051                 return -1;
2052         }
2053
2054
2055         tcparray = vnn->tcp_array;
2056
2057         /* If this is the first tickle */
2058         if (tcparray == NULL) {
2059                 tcparray = talloc(vnn, struct ctdb_tcp_array);
2060                 CTDB_NO_MEMORY(ctdb, tcparray);
2061                 vnn->tcp_array = tcparray;
2062
2063                 tcparray->num = 0;
2064                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2065                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2066
2067                 tcparray->connections[tcparray->num].src = p->src;
2068                 tcparray->connections[tcparray->num].dst = p->dst;
2069                 tcparray->num++;
2070
2071                 if (tcp_update_needed) {
2072                         vnn->tcp_update_needed = true;
2073                 }
2074                 return 0;
2075         }
2076
2077
2078         /* Do we already have this tickle ?*/
2079         tcp.src = p->src;
2080         tcp.dst = p->dst;
2081         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2082                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2083                         ctdb_addr_to_str(&tcp.dst),
2084                         ntohs(tcp.dst.ip.sin_port),
2085                         vnn->pnn));
2086                 return 0;
2087         }
2088
2089         /* A new tickle, we must add it to the array */
2090         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2091                                         struct ctdb_connection,
2092                                         tcparray->num+1);
2093         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2094
2095         tcparray->connections[tcparray->num].src = p->src;
2096         tcparray->connections[tcparray->num].dst = p->dst;
2097         tcparray->num++;
2098
2099         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2100                 ctdb_addr_to_str(&tcp.dst),
2101                 ntohs(tcp.dst.ip.sin_port),
2102                 vnn->pnn));
2103
2104         if (tcp_update_needed) {
2105                 vnn->tcp_update_needed = true;
2106         }
2107
2108         return 0;
2109 }
2110
2111
2112 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2113 {
2114         struct ctdb_connection *tcpp;
2115
2116         if (vnn == NULL) {
2117                 return;
2118         }
2119
2120         /* if the array is empty we cant remove it
2121            and we don't need to do anything
2122          */
2123         if (vnn->tcp_array == NULL) {
2124                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2125                         ctdb_addr_to_str(&conn->dst),
2126                         ntohs(conn->dst.ip.sin_port)));
2127                 return;
2128         }
2129
2130
2131         /* See if we know this connection
2132            if we don't know this connection  then we dont need to do anything
2133          */
2134         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2135         if (tcpp == NULL) {
2136                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2137                         ctdb_addr_to_str(&conn->dst),
2138                         ntohs(conn->dst.ip.sin_port)));
2139                 return;
2140         }
2141
2142
2143         /* We need to remove this entry from the array.
2144            Instead of allocating a new array and copying data to it
2145            we cheat and just copy the last entry in the existing array
2146            to the entry that is to be removed and just shring the 
2147            ->num field
2148          */
2149         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2150         vnn->tcp_array->num--;
2151
2152         /* If we deleted the last entry we also need to remove the entire array
2153          */
2154         if (vnn->tcp_array->num == 0) {
2155                 talloc_free(vnn->tcp_array);
2156                 vnn->tcp_array = NULL;
2157         }               
2158
2159         vnn->tcp_update_needed = true;
2160
2161         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2162                 ctdb_addr_to_str(&conn->src),
2163                 ntohs(conn->src.ip.sin_port)));
2164 }
2165
2166
2167 /*
2168   called by a daemon to inform us of a TCP connection that one of its
2169   clients used are no longer needed in the tickle database
2170  */
2171 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2172 {
2173         struct ctdb_vnn *vnn;
2174         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2175
2176         /* If we don't have public IPs, tickles are useless */
2177         if (ctdb->vnn == NULL) {
2178                 return 0;
2179         }
2180
2181         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2182         if (vnn == NULL) {
2183                 DEBUG(DEBUG_ERR,
2184                       (__location__ " unable to find public address %s\n",
2185                        ctdb_addr_to_str(&conn->dst)));
2186                 return 0;
2187         }
2188
2189         ctdb_remove_connection(vnn, conn);
2190
2191         return 0;
2192 }
2193
2194
2195 /*
2196   Called when another daemon starts - causes all tickles for all
2197   public addresses we are serving to be sent to the new node on the
2198   next check.  This actually causes the next scheduled call to
2199   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2200   doesn't require careful error handling.
2201  */
2202 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2203 {
2204         struct ctdb_vnn *vnn;
2205
2206         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2207                            (unsigned long) pnn));
2208
2209         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2210                 vnn->tcp_update_needed = true;
2211         }
2212
2213         return 0;
2214 }
2215
2216
2217 /*
2218   called when a client structure goes away - hook to remove
2219   elements from the tcp_list in all daemons
2220  */
2221 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2222 {
2223         while (client->tcp_list) {
2224                 struct ctdb_vnn *vnn;
2225                 struct ctdb_tcp_list *tcp = client->tcp_list;
2226                 struct ctdb_connection *conn = &tcp->connection;
2227
2228                 DLIST_REMOVE(client->tcp_list, tcp);
2229
2230                 vnn = find_public_ip_vnn(client->ctdb,
2231                                          &conn->dst);
2232                 if (vnn == NULL) {
2233                         DEBUG(DEBUG_ERR,
2234                               (__location__ " unable to find public address %s\n",
2235                                ctdb_addr_to_str(&conn->dst)));
2236                         continue;
2237                 }
2238
2239                 /* If the IP address is hosted on this node then
2240                  * remove the connection. */
2241                 if (vnn->pnn == client->ctdb->pnn) {
2242                         ctdb_remove_connection(vnn, conn);
2243                 }
2244
2245                 /* Otherwise this function has been called because the
2246                  * server IP address has been released to another node
2247                  * and the client has exited.  This means that we
2248                  * should not delete the connection information.  The
2249                  * takeover node processes connections too. */
2250         }
2251 }
2252
2253
2254 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2255 {
2256         struct ctdb_vnn *vnn;
2257         int count = 0;
2258         TDB_DATA data;
2259
2260         if (ctdb->tunable.disable_ip_failover == 1) {
2261                 return;
2262         }
2263
2264         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2265                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2266                         ctdb_vnn_unassign_iface(ctdb, vnn);
2267                         continue;
2268                 }
2269                 if (!vnn->iface) {
2270                         continue;
2271                 }
2272
2273                 /* Don't allow multiple releases at once.  Some code,
2274                  * particularly ctdb_tickle_sentenced_connections() is
2275                  * not re-entrant */
2276                 if (vnn->update_in_flight) {
2277                         DEBUG(DEBUG_WARNING,
2278                               (__location__
2279                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2280                                     ctdb_addr_to_str(&vnn->public_address),
2281                                     vnn->public_netmask_bits,
2282                                     ctdb_vnn_iface_string(vnn)));
2283                         continue;
2284                 }
2285                 vnn->update_in_flight = true;
2286
2287                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2288                                     ctdb_addr_to_str(&vnn->public_address),
2289                                     vnn->public_netmask_bits,
2290                                     ctdb_vnn_iface_string(vnn)));
2291
2292                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2293                                   ctdb_vnn_iface_string(vnn),
2294                                   ctdb_addr_to_str(&vnn->public_address),
2295                                   vnn->public_netmask_bits);
2296
2297                 data.dptr = (uint8_t *)talloc_strdup(
2298                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2299                 if (data.dptr != NULL) {
2300                         data.dsize = strlen((char *)data.dptr) + 1;
2301                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2302                                                  CTDB_SRVID_RELEASE_IP, data);
2303                         talloc_free(data.dptr);
2304                 }
2305
2306                 ctdb_vnn_unassign_iface(ctdb, vnn);
2307                 vnn->update_in_flight = false;
2308                 count++;
2309         }
2310
2311         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2312 }
2313
2314
2315 /*
2316   get list of public IPs
2317  */
2318 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2319                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2320 {
2321         int i, num, len;
2322         struct ctdb_public_ip_list_old *ips;
2323         struct ctdb_vnn *vnn;
2324         bool only_available = false;
2325
2326         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2327                 only_available = true;
2328         }
2329
2330         /* count how many public ip structures we have */
2331         num = 0;
2332         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2333                 num++;
2334         }
2335
2336         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2337                 num*sizeof(struct ctdb_public_ip);
2338         ips = talloc_zero_size(outdata, len);
2339         CTDB_NO_MEMORY(ctdb, ips);
2340
2341         i = 0;
2342         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2343                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2344                         continue;
2345                 }
2346                 ips->ips[i].pnn  = vnn->pnn;
2347                 ips->ips[i].addr = vnn->public_address;
2348                 i++;
2349         }
2350         ips->num = i;
2351         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2352                 i*sizeof(struct ctdb_public_ip);
2353
2354         outdata->dsize = len;
2355         outdata->dptr  = (uint8_t *)ips;
2356
2357         return 0;
2358 }
2359
2360
2361 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2362                                         struct ctdb_req_control_old *c,
2363                                         TDB_DATA indata,
2364                                         TDB_DATA *outdata)
2365 {
2366         int i, num, len;
2367         ctdb_sock_addr *addr;
2368         struct ctdb_public_ip_info_old *info;
2369         struct ctdb_vnn *vnn;
2370
2371         addr = (ctdb_sock_addr *)indata.dptr;
2372
2373         vnn = find_public_ip_vnn(ctdb, addr);
2374         if (vnn == NULL) {
2375                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2376                                  "'%s'not a public address\n",
2377                                  ctdb_addr_to_str(addr)));
2378                 return -1;
2379         }
2380
2381         /* count how many public ip structures we have */
2382         num = 0;
2383         for (;vnn->ifaces[num];) {
2384                 num++;
2385         }
2386
2387         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2388                 num*sizeof(struct ctdb_iface);
2389         info = talloc_zero_size(outdata, len);
2390         CTDB_NO_MEMORY(ctdb, info);
2391
2392         info->ip.addr = vnn->public_address;
2393         info->ip.pnn = vnn->pnn;
2394         info->active_idx = 0xFFFFFFFF;
2395
2396         for (i=0; vnn->ifaces[i]; i++) {
2397                 struct ctdb_interface *cur;
2398
2399                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2400                 if (cur == NULL) {
2401                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2402                                            vnn->ifaces[i]));
2403                         return -1;
2404                 }
2405                 if (vnn->iface == cur) {
2406                         info->active_idx = i;
2407                 }
2408                 strncpy(info->ifaces[i].name, cur->name,
2409                         sizeof(info->ifaces[i].name));
2410                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2411                 info->ifaces[i].link_state = cur->link_up;
2412                 info->ifaces[i].references = cur->references;
2413         }
2414         info->num = i;
2415         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2416                 i*sizeof(struct ctdb_iface);
2417
2418         outdata->dsize = len;
2419         outdata->dptr  = (uint8_t *)info;
2420
2421         return 0;
2422 }
2423
2424 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2425                                 struct ctdb_req_control_old *c,
2426                                 TDB_DATA *outdata)
2427 {
2428         int i, num, len;
2429         struct ctdb_iface_list_old *ifaces;
2430         struct ctdb_interface *cur;
2431
2432         /* count how many public ip structures we have */
2433         num = 0;
2434         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2435                 num++;
2436         }
2437
2438         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2439                 num*sizeof(struct ctdb_iface);
2440         ifaces = talloc_zero_size(outdata, len);
2441         CTDB_NO_MEMORY(ctdb, ifaces);
2442
2443         i = 0;
2444         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2445                 strncpy(ifaces->ifaces[i].name, cur->name,
2446                         sizeof(ifaces->ifaces[i].name));
2447                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2448                 ifaces->ifaces[i].link_state = cur->link_up;
2449                 ifaces->ifaces[i].references = cur->references;
2450                 i++;
2451         }
2452         ifaces->num = i;
2453         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2454                 i*sizeof(struct ctdb_iface);
2455
2456         outdata->dsize = len;
2457         outdata->dptr  = (uint8_t *)ifaces;
2458
2459         return 0;
2460 }
2461
2462 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2463                                     struct ctdb_req_control_old *c,
2464                                     TDB_DATA indata)
2465 {
2466         struct ctdb_iface *info;
2467         struct ctdb_interface *iface;
2468         bool link_up = false;
2469
2470         info = (struct ctdb_iface *)indata.dptr;
2471
2472         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2473                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2474                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2475                                   len, len, info->name));
2476                 return -1;
2477         }
2478
2479         switch (info->link_state) {
2480         case 0:
2481                 link_up = false;
2482                 break;
2483         case 1:
2484                 link_up = true;
2485                 break;
2486         default:
2487                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2488                                   (unsigned int)info->link_state));
2489                 return -1;
2490         }
2491
2492         if (info->references != 0) {
2493                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2494                                   (unsigned int)info->references));
2495                 return -1;
2496         }
2497
2498         iface = ctdb_find_iface(ctdb, info->name);
2499         if (iface == NULL) {
2500                 return -1;
2501         }
2502
2503         if (link_up == iface->link_up) {
2504                 return 0;
2505         }
2506
2507         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2508               ("iface[%s] has changed it's link status %s => %s\n",
2509                iface->name,
2510                iface->link_up?"up":"down",
2511                link_up?"up":"down"));
2512
2513         iface->link_up = link_up;
2514         return 0;
2515 }
2516
2517
2518 /*
2519   called by a daemon to inform us of the entire list of TCP tickles for
2520   a particular public address.
2521   this control should only be sent by the node that is currently serving
2522   that public address.
2523  */
2524 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2525 {
2526         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2527         struct ctdb_tcp_array *tcparray;
2528         struct ctdb_vnn *vnn;
2529
2530         /* We must at least have tickles.num or else we cant verify the size
2531            of the received data blob
2532          */
2533         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2534                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2535                 return -1;
2536         }
2537
2538         /* verify that the size of data matches what we expect */
2539         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2540                          + sizeof(struct ctdb_connection) * list->num) {
2541                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2542                 return -1;
2543         }
2544
2545         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2546                            ctdb_addr_to_str(&list->addr)));
2547
2548         vnn = find_public_ip_vnn(ctdb, &list->addr);
2549         if (vnn == NULL) {
2550                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2551                         ctdb_addr_to_str(&list->addr)));
2552
2553                 return 1;
2554         }
2555
2556         if (vnn->pnn == ctdb->pnn) {
2557                 DEBUG(DEBUG_INFO,
2558                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2559                        ctdb_addr_to_str(&list->addr)));
2560                 return 0;
2561         }
2562
2563         /* remove any old ticklelist we might have */
2564         talloc_free(vnn->tcp_array);
2565         vnn->tcp_array = NULL;
2566
2567         tcparray = talloc(vnn, struct ctdb_tcp_array);
2568         CTDB_NO_MEMORY(ctdb, tcparray);
2569
2570         tcparray->num = list->num;
2571
2572         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2573         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2574
2575         memcpy(tcparray->connections, &list->connections[0],
2576                sizeof(struct ctdb_connection)*tcparray->num);
2577
2578         /* We now have a new fresh tickle list array for this vnn */
2579         vnn->tcp_array = tcparray;
2580
2581         return 0;
2582 }
2583
2584 /*
2585   called to return the full list of tickles for the puclic address associated 
2586   with the provided vnn
2587  */
2588 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2589 {
2590         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2591         struct ctdb_tickle_list_old *list;
2592         struct ctdb_tcp_array *tcparray;
2593         int num, i;
2594         struct ctdb_vnn *vnn;
2595         unsigned port;
2596
2597         vnn = find_public_ip_vnn(ctdb, addr);
2598         if (vnn == NULL) {
2599                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2600                         ctdb_addr_to_str(addr)));
2601
2602                 return 1;
2603         }
2604
2605         port = ctdb_addr_to_port(addr);
2606
2607         tcparray = vnn->tcp_array;
2608         num = 0;
2609         if (tcparray != NULL) {
2610                 if (port == 0) {
2611                         /* All connections */
2612                         num = tcparray->num;
2613                 } else {
2614                         /* Count connections for port */
2615                         for (i = 0; i < tcparray->num; i++) {
2616                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2617                                         num++;
2618                                 }
2619                         }
2620                 }
2621         }
2622
2623         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2624                         + sizeof(struct ctdb_connection) * num;
2625
2626         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2627         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2628         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2629
2630         list->addr = *addr;
2631         list->num = num;
2632
2633         if (num == 0) {
2634                 return 0;
2635         }
2636
2637         num = 0;
2638         for (i = 0; i < tcparray->num; i++) {
2639                 if (port == 0 || \
2640                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2641                         list->connections[num] = tcparray->connections[i];
2642                         num++;
2643                 }
2644         }
2645
2646         return 0;
2647 }
2648
2649
2650 /*
2651   set the list of all tcp tickles for a public address
2652  */
2653 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2654                                             ctdb_sock_addr *addr,
2655                                             struct ctdb_tcp_array *tcparray)
2656 {
2657         int ret, num;
2658         TDB_DATA data;
2659         struct ctdb_tickle_list_old *list;
2660
2661         if (tcparray) {
2662                 num = tcparray->num;
2663         } else {
2664                 num = 0;
2665         }
2666
2667         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2668                         sizeof(struct ctdb_connection) * num;
2669         data.dptr = talloc_size(ctdb, data.dsize);
2670         CTDB_NO_MEMORY(ctdb, data.dptr);
2671
2672         list = (struct ctdb_tickle_list_old *)data.dptr;
2673         list->addr = *addr;
2674         list->num = num;
2675         if (tcparray) {
2676                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2677         }
2678
2679         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2680                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2681                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2682         if (ret != 0) {
2683                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2684                 return -1;
2685         }
2686
2687         talloc_free(data.dptr);
2688
2689         return ret;
2690 }
2691
2692
2693 /*
2694   perform tickle updates if required
2695  */
2696 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2697                                     struct tevent_timer *te,
2698                                     struct timeval t, void *private_data)
2699 {
2700         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2701         int ret;
2702         struct ctdb_vnn *vnn;
2703
2704         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2705                 /* we only send out updates for public addresses that 
2706                    we have taken over
2707                  */
2708                 if (ctdb->pnn != vnn->pnn) {
2709                         continue;
2710                 }
2711                 /* We only send out the updates if we need to */
2712                 if (!vnn->tcp_update_needed) {
2713                         continue;
2714                 }
2715                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2716                                                        &vnn->public_address,
2717                                                        vnn->tcp_array);
2718                 if (ret != 0) {
2719                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2720                                 ctdb_addr_to_str(&vnn->public_address)));
2721                 } else {
2722                         DEBUG(DEBUG_INFO,
2723                               ("Sent tickle update for public address %s\n",
2724                                ctdb_addr_to_str(&vnn->public_address)));
2725                         vnn->tcp_update_needed = false;
2726                 }
2727         }
2728
2729         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2730                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2731                          ctdb_update_tcp_tickles, ctdb);
2732 }
2733
2734 /*
2735   start periodic update of tcp tickles
2736  */
2737 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2738 {
2739         ctdb->tickle_update_context = talloc_new(ctdb);
2740
2741         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2742                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2743                          ctdb_update_tcp_tickles, ctdb);
2744 }
2745
2746
2747
2748
2749 struct control_gratious_arp {
2750         struct ctdb_context *ctdb;
2751         ctdb_sock_addr addr;
2752         const char *iface;
2753         int count;
2754 };
2755
2756 /*
2757   send a control_gratuitous arp
2758  */
2759 static void send_gratious_arp(struct tevent_context *ev,
2760                               struct tevent_timer *te,
2761                               struct timeval t, void *private_data)
2762 {
2763         int ret;
2764         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2765                                                         struct control_gratious_arp);
2766
2767         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2768         if (ret != 0) {
2769                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2770                                  arp->iface, strerror(errno)));
2771         }
2772
2773
2774         arp->count++;
2775         if (arp->count == CTDB_ARP_REPEAT) {
2776                 talloc_free(arp);
2777                 return;
2778         }
2779
2780         tevent_add_timer(arp->ctdb->ev, arp,
2781                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2782                          send_gratious_arp, arp);
2783 }
2784
2785
2786 /*
2787   send a gratious arp 
2788  */
2789 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2790 {
2791         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2792         struct control_gratious_arp *arp;
2793
2794         /* verify the size of indata */
2795         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2796                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2797                                  (unsigned)indata.dsize, 
2798                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2799                 return -1;
2800         }
2801         if (indata.dsize != 
2802                 ( offsetof(struct ctdb_addr_info_old, iface)
2803                 + gratious_arp->len ) ){
2804
2805                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2806                         "but should be %u bytes\n", 
2807                          (unsigned)indata.dsize, 
2808                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2809                 return -1;
2810         }
2811
2812
2813         arp = talloc(ctdb, struct control_gratious_arp);
2814         CTDB_NO_MEMORY(ctdb, arp);
2815
2816         arp->ctdb  = ctdb;
2817         arp->addr   = gratious_arp->addr;
2818         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2819         CTDB_NO_MEMORY(ctdb, arp->iface);
2820         arp->count = 0;
2821
2822         tevent_add_timer(arp->ctdb->ev, arp,
2823                          timeval_zero(), send_gratious_arp, arp);
2824
2825         return 0;
2826 }
2827
2828 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2829 {
2830         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2831         int ret;
2832
2833         /* verify the size of indata */
2834         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2835                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2836                 return -1;
2837         }
2838         if (indata.dsize != 
2839                 ( offsetof(struct ctdb_addr_info_old, iface)
2840                 + pub->len ) ){
2841
2842                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2843                         "but should be %u bytes\n", 
2844                          (unsigned)indata.dsize, 
2845                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2846                 return -1;
2847         }
2848
2849         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2850
2851         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2852
2853         if (ret != 0) {
2854                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2855                 return -1;
2856         }
2857
2858         return 0;
2859 }
2860
2861 struct delete_ip_callback_state {
2862         struct ctdb_req_control_old *c;
2863 };
2864
2865 /*
2866   called when releaseip event finishes for del_public_address
2867  */
2868 static void delete_ip_callback(struct ctdb_context *ctdb,
2869                                int32_t status, TDB_DATA data,
2870                                const char *errormsg,
2871                                void *private_data)
2872 {
2873         struct delete_ip_callback_state *state =
2874                 talloc_get_type(private_data, struct delete_ip_callback_state);
2875
2876         /* If release failed then fail. */
2877         ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2878         talloc_free(private_data);
2879 }
2880
2881 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2882                                         struct ctdb_req_control_old *c,
2883                                         TDB_DATA indata, bool *async_reply)
2884 {
2885         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2886         struct ctdb_vnn *vnn;
2887
2888         /* verify the size of indata */
2889         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2890                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2891                 return -1;
2892         }
2893         if (indata.dsize != 
2894                 ( offsetof(struct ctdb_addr_info_old, iface)
2895                 + pub->len ) ){
2896
2897                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2898                         "but should be %u bytes\n", 
2899                          (unsigned)indata.dsize, 
2900                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2901                 return -1;
2902         }
2903
2904         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2905
2906         /* walk over all public addresses until we find a match */
2907         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2908                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2909                         if (vnn->pnn == ctdb->pnn) {
2910                                 struct delete_ip_callback_state *state;
2911                                 struct ctdb_public_ip *ip;
2912                                 TDB_DATA data;
2913                                 int ret;
2914
2915                                 vnn->delete_pending = true;
2916
2917                                 state = talloc(ctdb,
2918                                                struct delete_ip_callback_state);
2919                                 CTDB_NO_MEMORY(ctdb, state);
2920                                 state->c = c;
2921
2922                                 ip = talloc(state, struct ctdb_public_ip);
2923                                 if (ip == NULL) {
2924                                         DEBUG(DEBUG_ERR,
2925                                               (__location__ " Out of memory\n"));
2926                                         talloc_free(state);
2927                                         return -1;
2928                                 }
2929                                 ip->pnn = -1;
2930                                 ip->addr = pub->addr;
2931
2932                                 data.dsize = sizeof(struct ctdb_public_ip);
2933                                 data.dptr = (unsigned char *)ip;
2934
2935                                 ret = ctdb_daemon_send_control(ctdb,
2936                                                                ctdb_get_pnn(ctdb),
2937                                                                0,
2938                                                                CTDB_CONTROL_RELEASE_IP,
2939                                                                0, 0,
2940                                                                data,
2941                                                                delete_ip_callback,
2942                                                                state);
2943                                 if (ret == -1) {
2944                                         DEBUG(DEBUG_ERR,
2945                                               (__location__ "Unable to send "
2946                                                "CTDB_CONTROL_RELEASE_IP\n"));
2947                                         talloc_free(state);
2948                                         return -1;
2949                                 }
2950
2951                                 state->c = talloc_steal(state, c);
2952                                 *async_reply = true;
2953                         } else {
2954                                 /* This IP is not hosted on the
2955                                  * current node so just delete it
2956                                  * now. */
2957                                 do_delete_ip(ctdb, vnn);
2958                         }
2959
2960                         return 0;
2961                 }
2962         }
2963
2964         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2965                          ctdb_addr_to_str(&pub->addr)));
2966         return -1;
2967 }
2968
2969
2970 struct ipreallocated_callback_state {
2971         struct ctdb_req_control_old *c;
2972 };
2973
2974 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2975                                         int status, void *p)
2976 {
2977         struct ipreallocated_callback_state *state =
2978                 talloc_get_type(p, struct ipreallocated_callback_state);
2979
2980         if (status != 0) {
2981                 DEBUG(DEBUG_ERR,
2982                       (" \"ipreallocated\" event script failed (status %d)\n",
2983                        status));
2984                 if (status == -ETIME) {
2985                         ctdb_ban_self(ctdb);
2986                 }
2987         }
2988
2989         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2990         talloc_free(state);
2991 }
2992
2993 /* A control to run the ipreallocated event */
2994 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2995                                    struct ctdb_req_control_old *c,
2996                                    bool *async_reply)
2997 {
2998         int ret;
2999         struct ipreallocated_callback_state *state;
3000
3001         state = talloc(ctdb, struct ipreallocated_callback_state);
3002         CTDB_NO_MEMORY(ctdb, state);
3003
3004         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3005
3006         ret = ctdb_event_script_callback(ctdb, state,
3007                                          ctdb_ipreallocated_callback, state,
3008                                          CTDB_EVENT_IPREALLOCATED,
3009                                          "%s", "");
3010
3011         if (ret != 0) {
3012                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3013                 talloc_free(state);
3014                 return -1;
3015         }
3016
3017         /* tell the control that we will be reply asynchronously */
3018         state->c    = talloc_steal(state, c);
3019         *async_reply = true;