ctdb-ipalloc: Switch set_ipflags_internal() to use a new-style node map
[sharpe/samba-autobuild/.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
59 {
60         if (vnn->iface) {
61                 return vnn->iface->name;
62         }
63
64         return "__none__";
65 }
66
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
68 {
69         struct ctdb_interface *i;
70
71         if (strlen(iface) > CTDB_IFACE_SIZE) {
72                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
73                 return -1;
74         }
75
76         /* Verify that we don't have an entry for this ip yet */
77         for (i=ctdb->ifaces;i;i=i->next) {
78                 if (strcmp(i->name, iface) == 0) {
79                         return 0;
80                 }
81         }
82
83         /* create a new structure for this interface */
84         i = talloc_zero(ctdb, struct ctdb_interface);
85         CTDB_NO_MEMORY_FATAL(ctdb, i);
86         i->name = talloc_strdup(i, iface);
87         CTDB_NO_MEMORY(ctdb, i->name);
88
89         i->link_up = true;
90
91         DLIST_ADD(ctdb->ifaces, i);
92
93         return 0;
94 }
95
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
97                                         const char *name)
98 {
99         int n;
100
101         for (n = 0; vnn->ifaces[n] != NULL; n++) {
102                 if (strcmp(name, vnn->ifaces[n]) == 0) {
103                         return true;
104                 }
105         }
106
107         return false;
108 }
109
110 /* If any interfaces now have no possible IPs then delete them.  This
111  * implementation is naive (i.e. simple) rather than clever
112  * (i.e. complex).  Given that this is run on delip and that operation
113  * is rare, this doesn't need to be efficient - it needs to be
114  * foolproof.  One alternative is reference counting, where the logic
115  * is distributed and can, therefore, be broken in multiple places.
116  * Another alternative is to build a red-black tree of interfaces that
117  * can have addresses (by walking ctdb->vnn once) and then walking
118  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
119  * one of those if the naive implementation causes problems...  :-)
120  */
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122                                         struct ctdb_vnn *vnn)
123 {
124         struct ctdb_interface *i, *next;
125
126         /* For each interface, check if there's an IP using it. */
127         for (i = ctdb->ifaces; i != NULL; i = next) {
128                 struct ctdb_vnn *tv;
129                 bool found;
130                 next = i->next;
131
132                 /* Only consider interfaces named in the given VNN. */
133                 if (!vnn_has_interface_with_name(vnn, i->name)) {
134                         continue;
135                 }
136
137                 /* Search for a vnn with this interface. */
138                 found = false;
139                 for (tv=ctdb->vnn; tv; tv=tv->next) {
140                         if (vnn_has_interface_with_name(tv, i->name)) {
141                                 found = true;
142                                 break;
143                         }
144                 }
145
146                 if (!found) {
147                         /* None of the VNNs are using this interface. */
148                         DLIST_REMOVE(ctdb->ifaces, i);
149                         talloc_free(i);
150                 }
151         }
152 }
153
154
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
156                                               const char *iface)
157 {
158         struct ctdb_interface *i;
159
160         for (i=ctdb->ifaces;i;i=i->next) {
161                 if (strcmp(i->name, iface) == 0) {
162                         return i;
163                 }
164         }
165
166         return NULL;
167 }
168
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170                                                   struct ctdb_vnn *vnn)
171 {
172         int i;
173         struct ctdb_interface *cur = NULL;
174         struct ctdb_interface *best = NULL;
175
176         for (i=0; vnn->ifaces[i]; i++) {
177
178                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
179                 if (cur == NULL) {
180                         continue;
181                 }
182
183                 if (!cur->link_up) {
184                         continue;
185                 }
186
187                 if (best == NULL) {
188                         best = cur;
189                         continue;
190                 }
191
192                 if (cur->references < best->references) {
193                         best = cur;
194                         continue;
195                 }
196         }
197
198         return best;
199 }
200
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202                                      struct ctdb_vnn *vnn)
203 {
204         struct ctdb_interface *best = NULL;
205
206         if (vnn->iface) {
207                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208                                    "still assigned to iface '%s'\n",
209                                    ctdb_addr_to_str(&vnn->public_address),
210                                    ctdb_vnn_iface_string(vnn)));
211                 return 0;
212         }
213
214         best = ctdb_vnn_best_iface(ctdb, vnn);
215         if (best == NULL) {
216                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217                                   "cannot assign to iface any iface\n",
218                                   ctdb_addr_to_str(&vnn->public_address)));
219                 return -1;
220         }
221
222         vnn->iface = best;
223         best->references++;
224         vnn->pnn = ctdb->pnn;
225
226         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227                            "now assigned to iface '%s' refs[%d]\n",
228                            ctdb_addr_to_str(&vnn->public_address),
229                            ctdb_vnn_iface_string(vnn),
230                            best->references));
231         return 0;
232 }
233
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235                                     struct ctdb_vnn *vnn)
236 {
237         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238                            "now unassigned (old iface '%s' refs[%d])\n",
239                            ctdb_addr_to_str(&vnn->public_address),
240                            ctdb_vnn_iface_string(vnn),
241                            vnn->iface?vnn->iface->references:0));
242         if (vnn->iface) {
243                 vnn->iface->references--;
244         }
245         vnn->iface = NULL;
246         if (vnn->pnn == ctdb->pnn) {
247                 vnn->pnn = -1;
248         }
249 }
250
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252                                struct ctdb_vnn *vnn)
253 {
254         int i;
255
256         /* Nodes that are not RUNNING can not host IPs */
257         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
258                 return false;
259         }
260
261         if (vnn->delete_pending) {
262                 return false;
263         }
264
265         if (vnn->iface && vnn->iface->link_up) {
266                 return true;
267         }
268
269         for (i=0; vnn->ifaces[i]; i++) {
270                 struct ctdb_interface *cur;
271
272                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
273                 if (cur == NULL) {
274                         continue;
275                 }
276
277                 if (cur->link_up) {
278                         return true;
279                 }
280         }
281
282         return false;
283 }
284
285 struct ctdb_takeover_arp {
286         struct ctdb_context *ctdb;
287         uint32_t count;
288         ctdb_sock_addr addr;
289         struct ctdb_tcp_array *tcparray;
290         struct ctdb_vnn *vnn;
291 };
292
293
294 /*
295   lists of tcp endpoints
296  */
297 struct ctdb_tcp_list {
298         struct ctdb_tcp_list *prev, *next;
299         struct ctdb_connection connection;
300 };
301
302 /*
303   list of clients to kill on IP release
304  */
305 struct ctdb_client_ip {
306         struct ctdb_client_ip *prev, *next;
307         struct ctdb_context *ctdb;
308         ctdb_sock_addr addr;
309         uint32_t client_id;
310 };
311
312
313 /*
314   send a gratuitous arp
315  */
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317                                   struct tevent_timer *te,
318                                   struct timeval t, void *private_data)
319 {
320         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
321                                                         struct ctdb_takeover_arp);
322         int i, ret;
323         struct ctdb_tcp_array *tcparray;
324         const char *iface = ctdb_vnn_iface_string(arp->vnn);
325
326         ret = ctdb_sys_send_arp(&arp->addr, iface);
327         if (ret != 0) {
328                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329                                   iface, strerror(errno)));
330         }
331
332         tcparray = arp->tcparray;
333         if (tcparray) {
334                 for (i=0;i<tcparray->num;i++) {
335                         struct ctdb_connection *tcon;
336
337                         tcon = &tcparray->connections[i];
338                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
340                                 ctdb_addr_to_str(&tcon->src),
341                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
342                         ret = ctdb_sys_send_tcp(
343                                 &tcon->src,
344                                 &tcon->dst,
345                                 0, 0, 0);
346                         if (ret != 0) {
347                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348                                         ctdb_addr_to_str(&tcon->src)));
349                         }
350                 }
351         }
352
353         arp->count++;
354
355         if (arp->count == CTDB_ARP_REPEAT) {
356                 talloc_free(arp);
357                 return;
358         }
359
360         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362                          ctdb_control_send_arp, arp);
363 }
364
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366                                        struct ctdb_vnn *vnn)
367 {
368         struct ctdb_takeover_arp *arp;
369         struct ctdb_tcp_array *tcparray;
370
371         if (!vnn->takeover_ctx) {
372                 vnn->takeover_ctx = talloc_new(vnn);
373                 if (!vnn->takeover_ctx) {
374                         return -1;
375                 }
376         }
377
378         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
379         if (!arp) {
380                 return -1;
381         }
382
383         arp->ctdb = ctdb;
384         arp->addr = vnn->public_address;
385         arp->vnn  = vnn;
386
387         tcparray = vnn->tcp_array;
388         if (tcparray) {
389                 /* add all of the known tcp connections for this IP to the
390                    list of tcp connections to send tickle acks for */
391                 arp->tcparray = talloc_steal(arp, tcparray);
392
393                 vnn->tcp_array = NULL;
394                 vnn->tcp_update_needed = true;
395         }
396
397         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398                          timeval_zero(), ctdb_control_send_arp, arp);
399
400         return 0;
401 }
402
403 struct takeover_callback_state {
404         struct ctdb_req_control_old *c;
405         ctdb_sock_addr *addr;
406         struct ctdb_vnn *vnn;
407 };
408
409 struct ctdb_do_takeip_state {
410         struct ctdb_req_control_old *c;
411         struct ctdb_vnn *vnn;
412 };
413
414 /*
415   called when takeip event finishes
416  */
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
418                                     void *private_data)
419 {
420         struct ctdb_do_takeip_state *state =
421                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
422         int32_t ret;
423         TDB_DATA data;
424
425         if (status != 0) {
426                 if (status == -ETIME) {
427                         ctdb_ban_self(ctdb);
428                 }
429                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430                                  ctdb_addr_to_str(&state->vnn->public_address),
431                                  ctdb_vnn_iface_string(state->vnn)));
432                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
433
434                 talloc_free(state);
435                 return;
436         }
437
438         if (ctdb->do_checkpublicip) {
439
440         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
441         if (ret != 0) {
442                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
443                 talloc_free(state);
444                 return;
445         }
446
447         }
448
449         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450         data.dsize = strlen((char *)data.dptr) + 1;
451         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
452
453         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
454
455
456         /* the control succeeded */
457         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
458         talloc_free(state);
459         return;
460 }
461
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
463 {
464         state->vnn->update_in_flight = false;
465         return 0;
466 }
467
468 /*
469   take over an ip address
470  */
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472                               struct ctdb_req_control_old *c,
473                               struct ctdb_vnn *vnn)
474 {
475         int ret;
476         struct ctdb_do_takeip_state *state;
477
478         if (vnn->update_in_flight) {
479                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480                                     "update for this IP already in flight\n",
481                                     ctdb_addr_to_str(&vnn->public_address),
482                                     vnn->public_netmask_bits));
483                 return -1;
484         }
485
486         ret = ctdb_vnn_assign_iface(ctdb, vnn);
487         if (ret != 0) {
488                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489                                  "assign a usable interface\n",
490                                  ctdb_addr_to_str(&vnn->public_address),
491                                  vnn->public_netmask_bits));
492                 return -1;
493         }
494
495         state = talloc(vnn, struct ctdb_do_takeip_state);
496         CTDB_NO_MEMORY(ctdb, state);
497
498         state->c = talloc_steal(ctdb, c);
499         state->vnn   = vnn;
500
501         vnn->update_in_flight = true;
502         talloc_set_destructor(state, ctdb_takeip_destructor);
503
504         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505                             ctdb_addr_to_str(&vnn->public_address),
506                             vnn->public_netmask_bits,
507                             ctdb_vnn_iface_string(vnn)));
508
509         ret = ctdb_event_script_callback(ctdb,
510                                          state,
511                                          ctdb_do_takeip_callback,
512                                          state,
513                                          CTDB_EVENT_TAKE_IP,
514                                          "%s %s %u",
515                                          ctdb_vnn_iface_string(vnn),
516                                          ctdb_addr_to_str(&vnn->public_address),
517                                          vnn->public_netmask_bits);
518
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
521                         ctdb_addr_to_str(&vnn->public_address),
522                         ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 struct ctdb_do_updateip_state {
531         struct ctdb_req_control_old *c;
532         struct ctdb_interface *old;
533         struct ctdb_vnn *vnn;
534 };
535
536 /*
537   called when updateip event finishes
538  */
539 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
540                                       void *private_data)
541 {
542         struct ctdb_do_updateip_state *state =
543                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
544         int32_t ret;
545
546         if (status != 0) {
547                 if (status == -ETIME) {
548                         ctdb_ban_self(ctdb);
549                 }
550                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
551                         ctdb_addr_to_str(&state->vnn->public_address),
552                         state->old->name,
553                         ctdb_vnn_iface_string(state->vnn)));
554
555                 /*
556                  * All we can do is reset the old interface
557                  * and let the next run fix it
558                  */
559                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
560                 state->vnn->iface = state->old;
561                 state->vnn->iface->references++;
562
563                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
564                 talloc_free(state);
565                 return;
566         }
567
568         if (ctdb->do_checkpublicip) {
569
570         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
571         if (ret != 0) {
572                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
573                 talloc_free(state);
574                 return;
575         }
576
577         }
578
579         /* the control succeeded */
580         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
581         talloc_free(state);
582         return;
583 }
584
585 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
586 {
587         state->vnn->update_in_flight = false;
588         return 0;
589 }
590
591 /*
592   update (move) an ip address
593  */
594 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
595                                 struct ctdb_req_control_old *c,
596                                 struct ctdb_vnn *vnn)
597 {
598         int ret;
599         struct ctdb_do_updateip_state *state;
600         struct ctdb_interface *old = vnn->iface;
601         const char *new_name;
602
603         if (vnn->update_in_flight) {
604                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
605                                     "update for this IP already in flight\n",
606                                     ctdb_addr_to_str(&vnn->public_address),
607                                     vnn->public_netmask_bits));
608                 return -1;
609         }
610
611         ctdb_vnn_unassign_iface(ctdb, vnn);
612         ret = ctdb_vnn_assign_iface(ctdb, vnn);
613         if (ret != 0) {
614                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
615                                  "assin a usable interface (old iface '%s')\n",
616                                  ctdb_addr_to_str(&vnn->public_address),
617                                  vnn->public_netmask_bits,
618                                  old->name));
619                 return -1;
620         }
621
622         new_name = ctdb_vnn_iface_string(vnn);
623         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
624                 /* A benign update from one interface onto itself.
625                  * no need to run the eventscripts in this case, just return
626                  * success.
627                  */
628                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
629                 return 0;
630         }
631
632         state = talloc(vnn, struct ctdb_do_updateip_state);
633         CTDB_NO_MEMORY(ctdb, state);
634
635         state->c = talloc_steal(ctdb, c);
636         state->old = old;
637         state->vnn = vnn;
638
639         vnn->update_in_flight = true;
640         talloc_set_destructor(state, ctdb_updateip_destructor);
641
642         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
643                             "interface %s to %s\n",
644                             ctdb_addr_to_str(&vnn->public_address),
645                             vnn->public_netmask_bits,
646                             old->name,
647                             new_name));
648
649         ret = ctdb_event_script_callback(ctdb,
650                                          state,
651                                          ctdb_do_updateip_callback,
652                                          state,
653                                          CTDB_EVENT_UPDATE_IP,
654                                          "%s %s %s %u",
655                                          state->old->name,
656                                          new_name,
657                                          ctdb_addr_to_str(&vnn->public_address),
658                                          vnn->public_netmask_bits);
659         if (ret != 0) {
660                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
661                                  ctdb_addr_to_str(&vnn->public_address),
662                                  old->name, new_name));
663                 talloc_free(state);
664                 return -1;
665         }
666
667         return 0;
668 }
669
670 /*
671   Find the vnn of the node that has a public ip address
672   returns -1 if the address is not known as a public address
673  */
674 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
675 {
676         struct ctdb_vnn *vnn;
677
678         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
679                 if (ctdb_same_ip(&vnn->public_address, addr)) {
680                         return vnn;
681                 }
682         }
683
684         return NULL;
685 }
686
687 /*
688   take over an ip address
689  */
690 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
691                                  struct ctdb_req_control_old *c,
692                                  TDB_DATA indata,
693                                  bool *async_reply)
694 {
695         int ret;
696         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
697         struct ctdb_vnn *vnn;
698         bool have_ip = false;
699         bool do_updateip = false;
700         bool do_takeip = false;
701         struct ctdb_interface *best_iface = NULL;
702
703         if (pip->pnn != ctdb->pnn) {
704                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
705                                  "with pnn %d, but we're node %d\n",
706                                  ctdb_addr_to_str(&pip->addr),
707                                  pip->pnn, ctdb->pnn));
708                 return -1;
709         }
710
711         /* update out vnn list */
712         vnn = find_public_ip_vnn(ctdb, &pip->addr);
713         if (vnn == NULL) {
714                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
715                         ctdb_addr_to_str(&pip->addr)));
716                 return 0;
717         }
718
719         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
720                 have_ip = ctdb_sys_have_ip(&pip->addr);
721         }
722         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
723         if (best_iface == NULL) {
724                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
725                                  "a usable interface (old %s, have_ip %d)\n",
726                                  ctdb_addr_to_str(&vnn->public_address),
727                                  vnn->public_netmask_bits,
728                                  ctdb_vnn_iface_string(vnn),
729                                  have_ip));
730                 return -1;
731         }
732
733         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
734                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
735                 have_ip = false;
736         }
737
738
739         if (vnn->iface == NULL && have_ip) {
740                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
741                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
742                                  ctdb_addr_to_str(&vnn->public_address)));
743                 return 0;
744         }
745
746         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
747                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
748                                   "and we have it on iface[%s], but it was assigned to node %d"
749                                   "and we are node %d, banning ourself\n",
750                                  ctdb_addr_to_str(&vnn->public_address),
751                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
752                 ctdb_ban_self(ctdb);
753                 return -1;
754         }
755
756         if (vnn->pnn == -1 && have_ip) {
757                 vnn->pnn = ctdb->pnn;
758                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759                                   "and we already have it on iface[%s], update local daemon\n",
760                                  ctdb_addr_to_str(&vnn->public_address),
761                                   ctdb_vnn_iface_string(vnn)));
762                 return 0;
763         }
764
765         if (vnn->iface) {
766                 if (vnn->iface != best_iface) {
767                         if (!vnn->iface->link_up) {
768                                 do_updateip = true;
769                         } else if (vnn->iface->references > (best_iface->references + 1)) {
770                                 /* only move when the rebalance gains something */
771                                         do_updateip = true;
772                         }
773                 }
774         }
775
776         if (!have_ip) {
777                 if (do_updateip) {
778                         ctdb_vnn_unassign_iface(ctdb, vnn);
779                         do_updateip = false;
780                 }
781                 do_takeip = true;
782         }
783
784         if (do_takeip) {
785                 ret = ctdb_do_takeip(ctdb, c, vnn);
786                 if (ret != 0) {
787                         return -1;
788                 }
789         } else if (do_updateip) {
790                 ret = ctdb_do_updateip(ctdb, c, vnn);
791                 if (ret != 0) {
792                         return -1;
793                 }
794         } else {
795                 /*
796                  * The interface is up and the kernel known the ip
797                  * => do nothing
798                  */
799                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
800                         ctdb_addr_to_str(&pip->addr),
801                         vnn->public_netmask_bits,
802                         ctdb_vnn_iface_string(vnn)));
803                 return 0;
804         }
805
806         /* tell ctdb_control.c that we will be replying asynchronously */
807         *async_reply = true;
808
809         return 0;
810 }
811
812 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
813 {
814         DLIST_REMOVE(ctdb->vnn, vnn);
815         ctdb_vnn_unassign_iface(ctdb, vnn);
816         ctdb_remove_orphaned_ifaces(ctdb, vnn);
817         talloc_free(vnn);
818 }
819
820 /*
821   called when releaseip event finishes
822  */
823 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
824                                 void *private_data)
825 {
826         struct takeover_callback_state *state = 
827                 talloc_get_type(private_data, struct takeover_callback_state);
828         TDB_DATA data;
829
830         if (status == -ETIME) {
831                 ctdb_ban_self(ctdb);
832         }
833
834         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
835                 if  (ctdb_sys_have_ip(state->addr)) {
836                         DEBUG(DEBUG_ERR,
837                               ("IP %s still hosted during release IP callback, failing\n",
838                                ctdb_addr_to_str(state->addr)));
839                         ctdb_request_control_reply(ctdb, state->c,
840                                                    NULL, -1, NULL);
841                         talloc_free(state);
842                         return;
843                 }
844         }
845
846         /* send a message to all clients of this node telling them
847            that the cluster has been reconfigured and they should
848            release any sockets on this IP */
849         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
850         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
851         data.dsize = strlen((char *)data.dptr)+1;
852
853         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
854
855         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
856
857         ctdb_vnn_unassign_iface(ctdb, state->vnn);
858
859         /* Process the IP if it has been marked for deletion */
860         if (state->vnn->delete_pending) {
861                 do_delete_ip(ctdb, state->vnn);
862                 state->vnn = NULL;
863         }
864
865         /* the control succeeded */
866         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
867         talloc_free(state);
868 }
869
870 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
871 {
872         if (state->vnn != NULL) {
873                 state->vnn->update_in_flight = false;
874         }
875         return 0;
876 }
877
878 /*
879   release an ip address
880  */
881 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
882                                 struct ctdb_req_control_old *c,
883                                 TDB_DATA indata, 
884                                 bool *async_reply)
885 {
886         int ret;
887         struct takeover_callback_state *state;
888         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
889         struct ctdb_vnn *vnn;
890         char *iface;
891
892         /* update our vnn list */
893         vnn = find_public_ip_vnn(ctdb, &pip->addr);
894         if (vnn == NULL) {
895                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
896                         ctdb_addr_to_str(&pip->addr)));
897                 return 0;
898         }
899         vnn->pnn = pip->pnn;
900
901         /* stop any previous arps */
902         talloc_free(vnn->takeover_ctx);
903         vnn->takeover_ctx = NULL;
904
905         /* Some ctdb tool commands (e.g. moveip) send
906          * lazy multicast to drop an IP from any node that isn't the
907          * intended new node.  The following causes makes ctdbd ignore
908          * a release for any address it doesn't host.
909          */
910         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
911                 if (!ctdb_sys_have_ip(&pip->addr)) {
912                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
913                                 ctdb_addr_to_str(&pip->addr),
914                                 vnn->public_netmask_bits,
915                                 ctdb_vnn_iface_string(vnn)));
916                         ctdb_vnn_unassign_iface(ctdb, vnn);
917                         return 0;
918                 }
919         } else {
920                 if (vnn->iface == NULL) {
921                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
922                                            ctdb_addr_to_str(&pip->addr),
923                                            vnn->public_netmask_bits));
924                         return 0;
925                 }
926         }
927
928         /* There is a potential race between take_ip and us because we
929          * update the VNN via a callback that run when the
930          * eventscripts have been run.  Avoid the race by allowing one
931          * update to be in flight at a time.
932          */
933         if (vnn->update_in_flight) {
934                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
935                                     "update for this IP already in flight\n",
936                                     ctdb_addr_to_str(&vnn->public_address),
937                                     vnn->public_netmask_bits));
938                 return -1;
939         }
940
941         iface = strdup(ctdb_vnn_iface_string(vnn));
942
943         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
944                 ctdb_addr_to_str(&pip->addr),
945                 vnn->public_netmask_bits,
946                 iface,
947                 pip->pnn));
948
949         state = talloc(ctdb, struct takeover_callback_state);
950         if (state == NULL) {
951                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
952                                __FILE__, __LINE__);
953                 free(iface);
954                 return -1;
955         }
956
957         state->c = talloc_steal(state, c);
958         state->addr = talloc(state, ctdb_sock_addr);       
959         if (state->addr == NULL) {
960                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
961                                __FILE__, __LINE__);
962                 free(iface);
963                 talloc_free(state);
964                 return -1;
965         }
966         *state->addr = pip->addr;
967         state->vnn   = vnn;
968
969         vnn->update_in_flight = true;
970         talloc_set_destructor(state, ctdb_releaseip_destructor);
971
972         ret = ctdb_event_script_callback(ctdb, 
973                                          state, release_ip_callback, state,
974                                          CTDB_EVENT_RELEASE_IP,
975                                          "%s %s %u",
976                                          iface,
977                                          ctdb_addr_to_str(&pip->addr),
978                                          vnn->public_netmask_bits);
979         free(iface);
980         if (ret != 0) {
981                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
982                         ctdb_addr_to_str(&pip->addr),
983                         ctdb_vnn_iface_string(vnn)));
984                 talloc_free(state);
985                 return -1;
986         }
987
988         /* tell the control that we will be reply asynchronously */
989         *async_reply = true;
990         return 0;
991 }
992
993 static int ctdb_add_public_address(struct ctdb_context *ctdb,
994                                    ctdb_sock_addr *addr,
995                                    unsigned mask, const char *ifaces,
996                                    bool check_address)
997 {
998         struct ctdb_vnn      *vnn;
999         uint32_t num = 0;
1000         char *tmp;
1001         const char *iface;
1002         int i;
1003         int ret;
1004
1005         tmp = strdup(ifaces);
1006         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1007                 if (!ctdb_sys_check_iface_exists(iface)) {
1008                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1009                         free(tmp);
1010                         return -1;
1011                 }
1012         }
1013         free(tmp);
1014
1015         /* Verify that we don't have an entry for this ip yet */
1016         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1017                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1018                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1019                                 ctdb_addr_to_str(addr)));
1020                         return -1;
1021                 }               
1022         }
1023
1024         /* create a new vnn structure for this ip address */
1025         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1026         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1027         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1028         tmp = talloc_strdup(vnn, ifaces);
1029         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1030         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1031                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1032                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1033                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1034                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1035                 num++;
1036         }
1037         talloc_free(tmp);
1038         vnn->ifaces[num] = NULL;
1039         vnn->public_address      = *addr;
1040         vnn->public_netmask_bits = mask;
1041         vnn->pnn                 = -1;
1042         if (check_address) {
1043                 if (ctdb_sys_have_ip(addr)) {
1044                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1045                         vnn->pnn = ctdb->pnn;
1046                 }
1047         }
1048
1049         for (i=0; vnn->ifaces[i]; i++) {
1050                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1051                 if (ret != 0) {
1052                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1053                                            "for public_address[%s]\n",
1054                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1055                         talloc_free(vnn);
1056                         return -1;
1057                 }
1058         }
1059
1060         DLIST_ADD(ctdb->vnn, vnn);
1061
1062         return 0;
1063 }
1064
1065 /*
1066   setup the public address lists from a file
1067 */
1068 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1069 {
1070         char **lines;
1071         int nlines;
1072         int i;
1073
1074         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1075         if (lines == NULL) {
1076                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1077                 return -1;
1078         }
1079         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1080                 nlines--;
1081         }
1082
1083         for (i=0;i<nlines;i++) {
1084                 unsigned mask;
1085                 ctdb_sock_addr addr;
1086                 const char *addrstr;
1087                 const char *ifaces;
1088                 char *tok, *line;
1089
1090                 line = lines[i];
1091                 while ((*line == ' ') || (*line == '\t')) {
1092                         line++;
1093                 }
1094                 if (*line == '#') {
1095                         continue;
1096                 }
1097                 if (strcmp(line, "") == 0) {
1098                         continue;
1099                 }
1100                 tok = strtok(line, " \t");
1101                 addrstr = tok;
1102                 tok = strtok(NULL, " \t");
1103                 if (tok == NULL) {
1104                         if (NULL == ctdb->default_public_interface) {
1105                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1106                                          i+1));
1107                                 talloc_free(lines);
1108                                 return -1;
1109                         }
1110                         ifaces = ctdb->default_public_interface;
1111                 } else {
1112                         ifaces = tok;
1113                 }
1114
1115                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1116                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1117                         talloc_free(lines);
1118                         return -1;
1119                 }
1120                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1121                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1122                         talloc_free(lines);
1123                         return -1;
1124                 }
1125         }
1126
1127
1128         talloc_free(lines);
1129         return 0;
1130 }
1131
1132 static struct ctdb_public_ip_list *
1133 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1134                              TALLOC_CTX *mem_ctx,
1135                              struct ctdb_node_map_old *nodemap,
1136                              uint32_t public_ip_flags)
1137 {
1138         int j, ret;
1139         struct ctdb_public_ip_list_old *ip_list;
1140         struct ctdb_public_ip_list *public_ips;
1141
1142         public_ips = talloc_zero_array(mem_ctx,
1143                                        struct ctdb_public_ip_list,
1144                                        nodemap->num);
1145         if (public_ips == NULL) {
1146                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1147                 return NULL;
1148         }
1149
1150         for (j = 0; j < nodemap->num; j++) {
1151                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1152                         continue;
1153                 }
1154
1155                 /* Retrieve the list of public IPs from the
1156                  * node. Flags says whether it is known or
1157                  * available. */
1158                 ret = ctdb_ctrl_get_public_ips_flags(
1159                         ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1160                         public_ip_flags, &ip_list);
1161                 if (ret != 0) {
1162                         DEBUG(DEBUG_ERR,
1163                               ("Failed to read public IPs from node: %u\n", j));
1164                         talloc_free(public_ips);
1165                         return NULL;
1166                 }
1167                 public_ips[j].num = ip_list->num;
1168                 if (ip_list->num == 0) {
1169                         talloc_free(ip_list);
1170                         continue;
1171                 }
1172                 public_ips[j].ip = talloc_zero_array(public_ips,
1173                                                      struct ctdb_public_ip,
1174                                                      ip_list->num);
1175                 if (public_ips[j].ip == NULL) {
1176                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1177                         talloc_free(public_ips);
1178                         return NULL;
1179                 }
1180                 memcpy(public_ips[j].ip, &ip_list->ips[0],
1181                        sizeof(struct ctdb_public_ip) * ip_list->num);
1182                 talloc_free(ip_list);
1183         }
1184
1185         return public_ips;
1186 }
1187
1188 static bool all_nodes_are_disabled(struct ctdb_node_map *nodemap)
1189 {
1190         int i;
1191
1192         for (i=0;i<nodemap->num;i++) {
1193                 if (!(nodemap->node[i].flags &
1194                       (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1195                         /* Found one completely healthy node */
1196                         return false;
1197                 }
1198         }
1199
1200         return true;
1201 }
1202
1203 struct get_tunable_callback_data {
1204         const char *tunable;
1205         uint32_t *out;
1206         bool fatal;
1207 };
1208
1209 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1210                                  int32_t res, TDB_DATA outdata,
1211                                  void *callback)
1212 {
1213         struct get_tunable_callback_data *cd =
1214                 (struct get_tunable_callback_data *)callback;
1215         int size;
1216
1217         if (res != 0) {
1218                 /* Already handled in fail callback */
1219                 return;
1220         }
1221
1222         if (outdata.dsize != sizeof(uint32_t)) {
1223                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1224                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1225                                  (int)outdata.dsize));
1226                 cd->fatal = true;
1227                 return;
1228         }
1229
1230         size = talloc_array_length(cd->out);
1231         if (pnn >= size) {
1232                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1233                                  cd->tunable, pnn, size));
1234                 return;
1235         }
1236
1237                 
1238         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1239 }
1240
1241 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1242                                        int32_t res, TDB_DATA outdata,
1243                                        void *callback)
1244 {
1245         struct get_tunable_callback_data *cd =
1246                 (struct get_tunable_callback_data *)callback;
1247
1248         switch (res) {
1249         case -ETIME:
1250                 DEBUG(DEBUG_ERR,
1251                       ("Timed out getting tunable \"%s\" from node %d\n",
1252                        cd->tunable, pnn));
1253                 cd->fatal = true;
1254                 break;
1255         case -EINVAL:
1256         case -1:
1257                 DEBUG(DEBUG_WARNING,
1258                       ("Tunable \"%s\" not implemented on node %d\n",
1259                        cd->tunable, pnn));
1260                 break;
1261         default:
1262                 DEBUG(DEBUG_ERR,
1263                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1264                        cd->tunable, pnn));
1265                 cd->fatal = true;
1266         }
1267 }
1268
1269 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1270                                         TALLOC_CTX *tmp_ctx,
1271                                         struct ctdb_node_map_old *nodemap,
1272                                         const char *tunable,
1273                                         uint32_t default_value)
1274 {
1275         TDB_DATA data;
1276         struct ctdb_control_get_tunable *t;
1277         uint32_t *nodes;
1278         uint32_t *tvals;
1279         struct get_tunable_callback_data callback_data;
1280         int i;
1281
1282         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1283         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1284         for (i=0; i<nodemap->num; i++) {
1285                 tvals[i] = default_value;
1286         }
1287                 
1288         callback_data.out = tvals;
1289         callback_data.tunable = tunable;
1290         callback_data.fatal = false;
1291
1292         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1293         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1294         t = (struct ctdb_control_get_tunable *)data.dptr;
1295         t->length = strlen(tunable)+1;
1296         memcpy(t->name, tunable, t->length);
1297         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1298         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1299                                       nodes, 0, TAKEOVER_TIMEOUT(),
1300                                       false, data,
1301                                       get_tunable_callback,
1302                                       get_tunable_fail_callback,
1303                                       &callback_data) != 0) {
1304                 if (callback_data.fatal) {
1305                         talloc_free(tvals);
1306                         tvals = NULL;
1307                 }
1308         }
1309         talloc_free(nodes);
1310         talloc_free(data.dptr);
1311
1312         return tvals;
1313 }
1314
1315 /* Set internal flags for IP allocation:
1316  *   Clear ip flags
1317  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1318  *   Set NOIPHOST ip flag for each INACTIVE node
1319  *   if all nodes are disabled:
1320  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1321  *   else
1322  *     Set NOIPHOST ip flags for disabled nodes
1323  */
1324 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1325                                  struct ctdb_node_map *nodemap,
1326                                  uint32_t *tval_noiptakeover,
1327                                  uint32_t *tval_noiphostonalldisabled)
1328 {
1329         int i;
1330
1331         for (i=0;i<nodemap->num;i++) {
1332                 /* Can not take IPs on node with NoIPTakeover set */
1333                 if (tval_noiptakeover[i] != 0) {
1334                         ipalloc_state->noiptakeover[i] = true;
1335                 }
1336
1337                 /* Can not host IPs on INACTIVE node */
1338                 if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) {
1339                         ipalloc_state->noiphost[i] = true;
1340                 }
1341         }
1342
1343         if (all_nodes_are_disabled(nodemap)) {
1344                 /* If all nodes are disabled, can not host IPs on node
1345                  * with NoIPHostOnAllDisabled set
1346                  */
1347                 for (i=0;i<nodemap->num;i++) {
1348                         if (tval_noiphostonalldisabled[i] != 0) {
1349                                 ipalloc_state->noiphost[i] = true;
1350                         }
1351                 }
1352         } else {
1353                 /* If some nodes are not disabled, then can not host
1354                  * IPs on DISABLED node
1355                  */
1356                 for (i=0;i<nodemap->num;i++) {
1357                         if (nodemap->node[i].flags & NODE_FLAGS_DISABLED) {
1358                                 ipalloc_state->noiphost[i] = true;
1359                         }
1360                 }
1361         }
1362 }
1363
1364 static struct ctdb_node_map *
1365 ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
1366                          const struct ctdb_node_map_old *old)
1367 {
1368         struct ctdb_node_map *new;
1369
1370         new = talloc(mem_ctx, struct ctdb_node_map);
1371         if (new == NULL) {
1372                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1373                 return NULL;
1374         }
1375         new->num = old->num;
1376         new->node = talloc_zero_array(new,
1377                                       struct ctdb_node_and_flags, new->num);
1378         memcpy(new->node, &old->nodes[0],
1379                sizeof(struct ctdb_node_and_flags) * new->num);
1380
1381         return new;
1382 }
1383
1384
1385 static bool set_ipflags(struct ctdb_context *ctdb,
1386                         struct ipalloc_state *ipalloc_state,
1387                         struct ctdb_node_map_old *nodemap)
1388 {
1389         uint32_t *tval_noiptakeover;
1390         uint32_t *tval_noiphostonalldisabled;
1391         struct ctdb_node_map *new;
1392
1393         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1394                                                    "NoIPTakeover", 0);
1395         if (tval_noiptakeover == NULL) {
1396                 return false;
1397         }
1398
1399         tval_noiphostonalldisabled =
1400                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1401                                        "NoIPHostOnAllDisabled", 0);
1402         if (tval_noiphostonalldisabled == NULL) {
1403                 /* Caller frees tmp_ctx */
1404                 return false;
1405         }
1406
1407         new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
1408         if (new == NULL) {
1409                 return false;
1410         }
1411
1412         set_ipflags_internal(ipalloc_state, new,
1413                              tval_noiptakeover,
1414                              tval_noiphostonalldisabled);
1415
1416         talloc_free(tval_noiptakeover);
1417         talloc_free(tval_noiphostonalldisabled);
1418         talloc_free(new);
1419
1420         return true;
1421 }
1422
1423 static enum ipalloc_algorithm
1424 determine_algorithm(const struct ctdb_tunable_list *tunables)
1425 {
1426         if (1 == tunables->lcp2_public_ip_assignment) {
1427                 return IPALLOC_LCP2;
1428         } else if (1 == tunables->deterministic_public_ips) {
1429                 return IPALLOC_DETERMINISTIC;
1430         } else {
1431                 return IPALLOC_NONDETERMINISTIC;
1432         }
1433 }
1434
1435 struct takeover_callback_data {
1436         uint32_t num_nodes;
1437         unsigned int *fail_count;
1438 };
1439
1440 static struct takeover_callback_data *
1441 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1442                             uint32_t num_nodes)
1443 {
1444         static struct takeover_callback_data *takeover_data;
1445
1446         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1447         if (takeover_data == NULL) {
1448                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1449                 return NULL;
1450         }
1451
1452         takeover_data->fail_count = talloc_zero_array(takeover_data,
1453                                                       unsigned int, num_nodes);
1454         if (takeover_data->fail_count == NULL) {
1455                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1456                 talloc_free(takeover_data);
1457                 return NULL;
1458         }
1459
1460         takeover_data->num_nodes = num_nodes;
1461
1462         return takeover_data;
1463 }
1464
1465 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1466                                        uint32_t node_pnn, int32_t res,
1467                                        TDB_DATA outdata, void *callback_data)
1468 {
1469         struct takeover_callback_data *cd =
1470                 talloc_get_type_abort(callback_data,
1471                                       struct takeover_callback_data);
1472
1473         if (node_pnn >= cd->num_nodes) {
1474                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1475                 return;
1476         }
1477
1478         if (cd->fail_count[node_pnn] == 0) {
1479                 DEBUG(DEBUG_ERR,
1480                       ("Node %u failed the takeover run\n", node_pnn));
1481         }
1482
1483         cd->fail_count[node_pnn]++;
1484 }
1485
1486 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1487                                           struct takeover_callback_data *tcd)
1488 {
1489         unsigned int max_fails = 0;
1490         uint32_t max_pnn = -1;
1491         uint32_t i;
1492
1493         for (i = 0; i < tcd->num_nodes; i++) {
1494                 if (tcd->fail_count[i] > max_fails) {
1495                         max_pnn = i;
1496                         max_fails = tcd->fail_count[i];
1497                 }
1498         }
1499
1500         if (max_fails > 0) {
1501                 int ret;
1502                 TDB_DATA data;
1503
1504                 DEBUG(DEBUG_ERR,
1505                       ("Sending banning credits to %u with fail count %u\n",
1506                        max_pnn, max_fails));
1507
1508                 data.dptr = (uint8_t *)&max_pnn;
1509                 data.dsize = sizeof(uint32_t);
1510                 ret = ctdb_client_send_message(ctdb,
1511                                                CTDB_BROADCAST_CONNECTED,
1512                                                CTDB_SRVID_BANNING,
1513                                                data);
1514                 if (ret != 0) {
1515                         DEBUG(DEBUG_ERR,
1516                               ("Failed to set banning credits for node %u\n",
1517                                max_pnn));
1518                 }
1519         }
1520 }
1521
1522 /*
1523  * Recalculate the allocation of public IPs to nodes and have the
1524  * nodes host their allocated addresses.
1525  *
1526  * - Initialise IP allocation state.  Pass:
1527      + algorithm to be used;
1528      + whether IP rebalancing ("failback") should be done (this uses a
1529        cluster-wide configuration variable and only the value form the
1530        master node is used); and
1531  *   + list of nodes to force rebalance (internal structure, currently
1532  *     no way to fetch, only used by LCP2 for nodes that have had new
1533  *     IP addresses added).
1534  * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1535  *   connected nodes - this is done separately so tunable values can
1536  *   be faked in unit testing
1537  * - Populate NoIPTakover tunable in IP allocation state
1538  * - Populate NoIPHost in IP allocation state, derived from node flags
1539  *   and NoIPHostOnAllDisabled tunable
1540  * - Retrieve known and available IP addresses (done separately so
1541  *   values can be faked in unit testing)
1542  * - Use ipalloc_set_public_ips() to set known and available IP
1543      addresses for allocation
1544  * - If cluster can't host IP addresses then early exit
1545  * - Run IP allocation algorithm
1546  * - Send RELEASE_IP to all nodes for IPs they should not host
1547  * - Send TAKE_IP to all nodes for IPs they should host
1548  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1549  */
1550 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1551                       uint32_t *force_rebalance_nodes)
1552 {
1553         int i, ret;
1554         struct ctdb_public_ip ip;
1555         uint32_t *nodes;
1556         struct public_ip_list *all_ips, *tmp_ip;
1557         TDB_DATA data;
1558         struct timeval timeout;
1559         struct client_async_data *async_data;
1560         struct ctdb_client_control_state *state;
1561         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1562         struct ipalloc_state *ipalloc_state;
1563         struct ctdb_public_ip_list *known_ips, *available_ips;
1564         struct takeover_callback_data *takeover_data;
1565
1566         /* Initialise fail callback data to be used with
1567          * takeover_run_fail_callback().  A failure in any of the
1568          * following steps will cause an early return, so this can be
1569          * reused for each of those steps without re-initialising. */
1570         takeover_data = takeover_callback_data_init(tmp_ctx,
1571                                                     nodemap->num);
1572         if (takeover_data == NULL) {
1573                 talloc_free(tmp_ctx);
1574                 return -1;
1575         }
1576
1577         /*
1578          * ip failover is completely disabled, just send out the 
1579          * ipreallocated event.
1580          */
1581         if (ctdb->tunable.disable_ip_failover != 0) {
1582                 goto ipreallocated;
1583         }
1584
1585         ipalloc_state = ipalloc_state_init(tmp_ctx, ctdb->num_nodes,
1586                                            determine_algorithm(&ctdb->tunable),
1587                                            (ctdb->tunable.no_ip_failback != 0),
1588                                            force_rebalance_nodes);
1589         if (ipalloc_state == NULL) {
1590                 talloc_free(tmp_ctx);
1591                 return -1;
1592         }
1593
1594         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1595                 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1596                 talloc_free(tmp_ctx);
1597                 return -1;
1598         }
1599
1600         /* Fetch known/available public IPs from each active node */
1601         /* Fetch lists of known public IPs from all nodes */
1602         known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1603                                                  nodemap, 0);
1604         if (known_ips == NULL) {
1605                 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1606                 talloc_free(tmp_ctx);
1607                 return -1;
1608         }
1609         available_ips = ctdb_fetch_remote_public_ips(
1610                 ctdb, ipalloc_state, nodemap,
1611                 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1612         if (available_ips == NULL) {
1613                 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1614                 talloc_free(tmp_ctx);
1615                 return -1;
1616         }
1617
1618         if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
1619                 DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
1620                 talloc_free(tmp_ctx);
1621                 return -1;
1622         }
1623
1624         if (! ipalloc_can_host_ips(ipalloc_state)) {
1625                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1626                 goto ipreallocated;
1627         }
1628
1629         /* Do the IP reassignment calculations */
1630         ipalloc(ipalloc_state);
1631         if (ipalloc_state->all_ips == NULL) {
1632                 talloc_free(tmp_ctx);
1633                 return -1;
1634         }
1635         all_ips = ipalloc_state->all_ips;
1636
1637         /* Now tell all nodes to release any public IPs should not
1638          * host.  This will be a NOOP on nodes that don't currently
1639          * hold the given IP.
1640          */
1641         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1642         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1643
1644         async_data->fail_callback = takeover_run_fail_callback;
1645         async_data->callback_data = takeover_data;
1646
1647         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1648
1649         /* Send a RELEASE_IP to all nodes that should not be hosting
1650          * each IP.  For each IP, all but one of these will be
1651          * redundant.  However, the redundant ones are used to tell
1652          * nodes which node should be hosting the IP so that commands
1653          * like "ctdb ip" can display a particular nodes idea of who
1654          * is hosting what. */
1655         for (i=0;i<nodemap->num;i++) {
1656                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1657                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1658                         continue;
1659                 }
1660
1661                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1662                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1663                                 /* This node should be serving this
1664                                    vnn so don't tell it to release the ip
1665                                 */
1666                                 continue;
1667                         }
1668                         ip.pnn  = tmp_ip->pnn;
1669                         ip.addr = tmp_ip->addr;
1670
1671                         timeout = TAKEOVER_TIMEOUT();
1672                         data.dsize = sizeof(ip);
1673                         data.dptr  = (uint8_t *)&ip;
1674                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1675                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1676                                                   data, async_data,
1677                                                   &timeout, NULL);
1678                         if (state == NULL) {
1679                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1680                                 talloc_free(tmp_ctx);
1681                                 return -1;
1682                         }
1683
1684                         ctdb_client_async_add(async_data, state);
1685                 }
1686         }
1687         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1688                 DEBUG(DEBUG_ERR,
1689                       ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1690                 goto fail;
1691         }
1692         talloc_free(async_data);
1693
1694
1695         /* For each IP, send a TAKOVER_IP to the node that should be
1696          * hosting it.  Many of these will often be redundant (since
1697          * the allocation won't have changed) but they can be useful
1698          * to recover from inconsistencies. */
1699         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1700         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1701
1702         async_data->fail_callback = takeover_run_fail_callback;
1703         async_data->callback_data = takeover_data;
1704
1705         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1706                 if (tmp_ip->pnn == -1) {
1707                         /* this IP won't be taken over */
1708                         continue;
1709                 }
1710
1711                 ip.pnn  = tmp_ip->pnn;
1712                 ip.addr = tmp_ip->addr;
1713
1714                 timeout = TAKEOVER_TIMEOUT();
1715                 data.dsize = sizeof(ip);
1716                 data.dptr  = (uint8_t *)&ip;
1717                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1718                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1719                                           data, async_data, &timeout, NULL);
1720                 if (state == NULL) {
1721                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1722                         talloc_free(tmp_ctx);
1723                         return -1;
1724                 }
1725
1726                 ctdb_client_async_add(async_data, state);
1727         }
1728         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1729                 DEBUG(DEBUG_ERR,
1730                       ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1731                 goto fail;
1732         }
1733
1734 ipreallocated:
1735         /*
1736          * Tell all nodes to run eventscripts to process the
1737          * "ipreallocated" event.  This can do a lot of things,
1738          * including restarting services to reconfigure them if public
1739          * IPs have moved.  Once upon a time this event only used to
1740          * update natgw.
1741          */
1742         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1743         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1744                                         nodes, 0, TAKEOVER_TIMEOUT(),
1745                                         false, tdb_null,
1746                                         NULL, takeover_run_fail_callback,
1747                                         takeover_data);
1748         if (ret != 0) {
1749                 DEBUG(DEBUG_ERR,
1750                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1751                 goto fail;
1752         }
1753
1754         talloc_free(tmp_ctx);
1755         return ret;
1756
1757 fail:
1758         takeover_run_process_failures(ctdb, takeover_data);
1759         talloc_free(tmp_ctx);
1760         return -1;
1761 }
1762
1763
1764 /*
1765   destroy a ctdb_client_ip structure
1766  */
1767 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1768 {
1769         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1770                 ctdb_addr_to_str(&ip->addr),
1771                 ntohs(ip->addr.ip.sin_port),
1772                 ip->client_id));
1773
1774         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1775         return 0;
1776 }
1777
1778 /*
1779   called by a client to inform us of a TCP connection that it is managing
1780   that should tickled with an ACK when IP takeover is done
1781  */
1782 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1783                                 TDB_DATA indata)
1784 {
1785         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1786         struct ctdb_connection *tcp_sock = NULL;
1787         struct ctdb_tcp_list *tcp;
1788         struct ctdb_connection t;
1789         int ret;
1790         TDB_DATA data;
1791         struct ctdb_client_ip *ip;
1792         struct ctdb_vnn *vnn;
1793         ctdb_sock_addr addr;
1794
1795         /* If we don't have public IPs, tickles are useless */
1796         if (ctdb->vnn == NULL) {
1797                 return 0;
1798         }
1799
1800         tcp_sock = (struct ctdb_connection *)indata.dptr;
1801
1802         addr = tcp_sock->src;
1803         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1804         addr = tcp_sock->dst;
1805         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1806
1807         ZERO_STRUCT(addr);
1808         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1809         vnn = find_public_ip_vnn(ctdb, &addr);
1810         if (vnn == NULL) {
1811                 switch (addr.sa.sa_family) {
1812                 case AF_INET:
1813                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1814                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1815                                         ctdb_addr_to_str(&addr)));
1816                         }
1817                         break;
1818                 case AF_INET6:
1819                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1820                                 ctdb_addr_to_str(&addr)));
1821                         break;
1822                 default:
1823                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1824                 }
1825
1826                 return 0;
1827         }
1828
1829         if (vnn->pnn != ctdb->pnn) {
1830                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1831                         ctdb_addr_to_str(&addr),
1832                         client_id, client->pid));
1833                 /* failing this call will tell smbd to die */
1834                 return -1;
1835         }
1836
1837         ip = talloc(client, struct ctdb_client_ip);
1838         CTDB_NO_MEMORY(ctdb, ip);
1839
1840         ip->ctdb      = ctdb;
1841         ip->addr      = addr;
1842         ip->client_id = client_id;
1843         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1844         DLIST_ADD(ctdb->client_ip_list, ip);
1845
1846         tcp = talloc(client, struct ctdb_tcp_list);
1847         CTDB_NO_MEMORY(ctdb, tcp);
1848
1849         tcp->connection.src = tcp_sock->src;
1850         tcp->connection.dst = tcp_sock->dst;
1851
1852         DLIST_ADD(client->tcp_list, tcp);
1853
1854         t.src = tcp_sock->src;
1855         t.dst = tcp_sock->dst;
1856
1857         data.dptr = (uint8_t *)&t;
1858         data.dsize = sizeof(t);
1859
1860         switch (addr.sa.sa_family) {
1861         case AF_INET:
1862                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1863                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1864                         ctdb_addr_to_str(&tcp_sock->src),
1865                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1866                 break;
1867         case AF_INET6:
1868                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1869                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1870                         ctdb_addr_to_str(&tcp_sock->src),
1871                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1872                 break;
1873         default:
1874                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1875         }
1876
1877
1878         /* tell all nodes about this tcp connection */
1879         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1880                                        CTDB_CONTROL_TCP_ADD,
1881                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1882         if (ret != 0) {
1883                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1884                 return -1;
1885         }
1886
1887         return 0;
1888 }
1889
1890 /*
1891   find a tcp address on a list
1892  */
1893 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1894                                            struct ctdb_connection *tcp)
1895 {
1896         int i;
1897
1898         if (array == NULL) {
1899                 return NULL;
1900         }
1901
1902         for (i=0;i<array->num;i++) {
1903                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1904                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1905                         return &array->connections[i];
1906                 }
1907         }
1908         return NULL;
1909 }
1910
1911
1912
1913 /*
1914   called by a daemon to inform us of a TCP connection that one of its
1915   clients managing that should tickled with an ACK when IP takeover is
1916   done
1917  */
1918 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1919 {
1920         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1921         struct ctdb_tcp_array *tcparray;
1922         struct ctdb_connection tcp;
1923         struct ctdb_vnn *vnn;
1924
1925         /* If we don't have public IPs, tickles are useless */
1926         if (ctdb->vnn == NULL) {
1927                 return 0;
1928         }
1929
1930         vnn = find_public_ip_vnn(ctdb, &p->dst);
1931         if (vnn == NULL) {
1932                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1933                         ctdb_addr_to_str(&p->dst)));
1934
1935                 return -1;
1936         }
1937
1938
1939         tcparray = vnn->tcp_array;
1940
1941         /* If this is the first tickle */
1942         if (tcparray == NULL) {
1943                 tcparray = talloc(vnn, struct ctdb_tcp_array);
1944                 CTDB_NO_MEMORY(ctdb, tcparray);
1945                 vnn->tcp_array = tcparray;
1946
1947                 tcparray->num = 0;
1948                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1949                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1950
1951                 tcparray->connections[tcparray->num].src = p->src;
1952                 tcparray->connections[tcparray->num].dst = p->dst;
1953                 tcparray->num++;
1954
1955                 if (tcp_update_needed) {
1956                         vnn->tcp_update_needed = true;
1957                 }
1958                 return 0;
1959         }
1960
1961
1962         /* Do we already have this tickle ?*/
1963         tcp.src = p->src;
1964         tcp.dst = p->dst;
1965         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1966                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1967                         ctdb_addr_to_str(&tcp.dst),
1968                         ntohs(tcp.dst.ip.sin_port),
1969                         vnn->pnn));
1970                 return 0;
1971         }
1972
1973         /* A new tickle, we must add it to the array */
1974         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1975                                         struct ctdb_connection,
1976                                         tcparray->num+1);
1977         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1978
1979         tcparray->connections[tcparray->num].src = p->src;
1980         tcparray->connections[tcparray->num].dst = p->dst;
1981         tcparray->num++;
1982
1983         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1984                 ctdb_addr_to_str(&tcp.dst),
1985                 ntohs(tcp.dst.ip.sin_port),
1986                 vnn->pnn));
1987
1988         if (tcp_update_needed) {
1989                 vnn->tcp_update_needed = true;
1990         }
1991
1992         return 0;
1993 }
1994
1995
1996 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1997 {
1998         struct ctdb_connection *tcpp;
1999
2000         if (vnn == NULL) {
2001                 return;
2002         }
2003
2004         /* if the array is empty we cant remove it
2005            and we don't need to do anything
2006          */
2007         if (vnn->tcp_array == NULL) {
2008                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2009                         ctdb_addr_to_str(&conn->dst),
2010                         ntohs(conn->dst.ip.sin_port)));
2011                 return;
2012         }
2013
2014
2015         /* See if we know this connection
2016            if we don't know this connection  then we dont need to do anything
2017          */
2018         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2019         if (tcpp == NULL) {
2020                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2021                         ctdb_addr_to_str(&conn->dst),
2022                         ntohs(conn->dst.ip.sin_port)));
2023                 return;
2024         }
2025
2026
2027         /* We need to remove this entry from the array.
2028            Instead of allocating a new array and copying data to it
2029            we cheat and just copy the last entry in the existing array
2030            to the entry that is to be removed and just shring the 
2031            ->num field
2032          */
2033         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2034         vnn->tcp_array->num--;
2035
2036         /* If we deleted the last entry we also need to remove the entire array
2037          */
2038         if (vnn->tcp_array->num == 0) {
2039                 talloc_free(vnn->tcp_array);
2040                 vnn->tcp_array = NULL;
2041         }               
2042
2043         vnn->tcp_update_needed = true;
2044
2045         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2046                 ctdb_addr_to_str(&conn->src),
2047                 ntohs(conn->src.ip.sin_port)));
2048 }
2049
2050
2051 /*
2052   called by a daemon to inform us of a TCP connection that one of its
2053   clients used are no longer needed in the tickle database
2054  */
2055 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2056 {
2057         struct ctdb_vnn *vnn;
2058         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2059
2060         /* If we don't have public IPs, tickles are useless */
2061         if (ctdb->vnn == NULL) {
2062                 return 0;
2063         }
2064
2065         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2066         if (vnn == NULL) {
2067                 DEBUG(DEBUG_ERR,
2068                       (__location__ " unable to find public address %s\n",
2069                        ctdb_addr_to_str(&conn->dst)));
2070                 return 0;
2071         }
2072
2073         ctdb_remove_connection(vnn, conn);
2074
2075         return 0;
2076 }
2077
2078
2079 /*
2080   Called when another daemon starts - causes all tickles for all
2081   public addresses we are serving to be sent to the new node on the
2082   next check.  This actually causes the next scheduled call to
2083   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2084   doesn't require careful error handling.
2085  */
2086 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2087 {
2088         struct ctdb_vnn *vnn;
2089
2090         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2091                            (unsigned long) pnn));
2092
2093         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2094                 vnn->tcp_update_needed = true;
2095         }
2096
2097         return 0;
2098 }
2099
2100
2101 /*
2102   called when a client structure goes away - hook to remove
2103   elements from the tcp_list in all daemons
2104  */
2105 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2106 {
2107         while (client->tcp_list) {
2108                 struct ctdb_vnn *vnn;
2109                 struct ctdb_tcp_list *tcp = client->tcp_list;
2110                 struct ctdb_connection *conn = &tcp->connection;
2111
2112                 DLIST_REMOVE(client->tcp_list, tcp);
2113
2114                 vnn = find_public_ip_vnn(client->ctdb,
2115                                          &conn->dst);
2116                 if (vnn == NULL) {
2117                         DEBUG(DEBUG_ERR,
2118                               (__location__ " unable to find public address %s\n",
2119                                ctdb_addr_to_str(&conn->dst)));
2120                         continue;
2121                 }
2122
2123                 /* If the IP address is hosted on this node then
2124                  * remove the connection. */
2125                 if (vnn->pnn == client->ctdb->pnn) {
2126                         ctdb_remove_connection(vnn, conn);
2127                 }
2128
2129                 /* Otherwise this function has been called because the
2130                  * server IP address has been released to another node
2131                  * and the client has exited.  This means that we
2132                  * should not delete the connection information.  The
2133                  * takeover node processes connections too. */
2134         }
2135 }
2136
2137
2138 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2139 {
2140         struct ctdb_vnn *vnn;
2141         int count = 0;
2142         TDB_DATA data;
2143
2144         if (ctdb->tunable.disable_ip_failover == 1) {
2145                 return;
2146         }
2147
2148         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2149                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2150                         ctdb_vnn_unassign_iface(ctdb, vnn);
2151                         continue;
2152                 }
2153                 if (!vnn->iface) {
2154                         continue;
2155                 }
2156
2157                 /* Don't allow multiple releases at once.  Some code,
2158                  * particularly ctdb_tickle_sentenced_connections() is
2159                  * not re-entrant */
2160                 if (vnn->update_in_flight) {
2161                         DEBUG(DEBUG_WARNING,
2162                               (__location__
2163                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2164                                     ctdb_addr_to_str(&vnn->public_address),
2165                                     vnn->public_netmask_bits,
2166                                     ctdb_vnn_iface_string(vnn)));
2167                         continue;
2168                 }
2169                 vnn->update_in_flight = true;
2170
2171                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2172                                     ctdb_addr_to_str(&vnn->public_address),
2173                                     vnn->public_netmask_bits,
2174                                     ctdb_vnn_iface_string(vnn)));
2175
2176                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2177                                   ctdb_vnn_iface_string(vnn),
2178                                   ctdb_addr_to_str(&vnn->public_address),
2179                                   vnn->public_netmask_bits);
2180
2181                 data.dptr = (uint8_t *)talloc_strdup(
2182                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2183                 if (data.dptr != NULL) {
2184                         data.dsize = strlen((char *)data.dptr) + 1;
2185                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2186                                                  CTDB_SRVID_RELEASE_IP, data);
2187                         talloc_free(data.dptr);
2188                 }
2189
2190                 ctdb_vnn_unassign_iface(ctdb, vnn);
2191                 vnn->update_in_flight = false;
2192                 count++;
2193         }
2194
2195         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2196 }
2197
2198
2199 /*
2200   get list of public IPs
2201  */
2202 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2203                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2204 {
2205         int i, num, len;
2206         struct ctdb_public_ip_list_old *ips;
2207         struct ctdb_vnn *vnn;
2208         bool only_available = false;
2209
2210         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2211                 only_available = true;
2212         }
2213
2214         /* count how many public ip structures we have */
2215         num = 0;
2216         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2217                 num++;
2218         }
2219
2220         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2221                 num*sizeof(struct ctdb_public_ip);
2222         ips = talloc_zero_size(outdata, len);
2223         CTDB_NO_MEMORY(ctdb, ips);
2224
2225         i = 0;
2226         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2227                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2228                         continue;
2229                 }
2230                 ips->ips[i].pnn  = vnn->pnn;
2231                 ips->ips[i].addr = vnn->public_address;
2232                 i++;
2233         }
2234         ips->num = i;
2235         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2236                 i*sizeof(struct ctdb_public_ip);
2237
2238         outdata->dsize = len;
2239         outdata->dptr  = (uint8_t *)ips;
2240
2241         return 0;
2242 }
2243
2244
2245 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2246                                         struct ctdb_req_control_old *c,
2247                                         TDB_DATA indata,
2248                                         TDB_DATA *outdata)
2249 {
2250         int i, num, len;
2251         ctdb_sock_addr *addr;
2252         struct ctdb_public_ip_info_old *info;
2253         struct ctdb_vnn *vnn;
2254
2255         addr = (ctdb_sock_addr *)indata.dptr;
2256
2257         vnn = find_public_ip_vnn(ctdb, addr);
2258         if (vnn == NULL) {
2259                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2260                                  "'%s'not a public address\n",
2261                                  ctdb_addr_to_str(addr)));
2262                 return -1;
2263         }
2264
2265         /* count how many public ip structures we have */
2266         num = 0;
2267         for (;vnn->ifaces[num];) {
2268                 num++;
2269         }
2270
2271         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2272                 num*sizeof(struct ctdb_iface);
2273         info = talloc_zero_size(outdata, len);
2274         CTDB_NO_MEMORY(ctdb, info);
2275
2276         info->ip.addr = vnn->public_address;
2277         info->ip.pnn = vnn->pnn;
2278         info->active_idx = 0xFFFFFFFF;
2279
2280         for (i=0; vnn->ifaces[i]; i++) {
2281                 struct ctdb_interface *cur;
2282
2283                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2284                 if (cur == NULL) {
2285                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2286                                            vnn->ifaces[i]));
2287                         return -1;
2288                 }
2289                 if (vnn->iface == cur) {
2290                         info->active_idx = i;
2291                 }
2292                 strncpy(info->ifaces[i].name, cur->name,
2293                         sizeof(info->ifaces[i].name));
2294                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2295                 info->ifaces[i].link_state = cur->link_up;
2296                 info->ifaces[i].references = cur->references;
2297         }
2298         info->num = i;
2299         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2300                 i*sizeof(struct ctdb_iface);
2301
2302         outdata->dsize = len;
2303         outdata->dptr  = (uint8_t *)info;
2304
2305         return 0;
2306 }
2307
2308 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2309                                 struct ctdb_req_control_old *c,
2310                                 TDB_DATA *outdata)
2311 {
2312         int i, num, len;
2313         struct ctdb_iface_list_old *ifaces;
2314         struct ctdb_interface *cur;
2315
2316         /* count how many public ip structures we have */
2317         num = 0;
2318         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2319                 num++;
2320         }
2321
2322         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2323                 num*sizeof(struct ctdb_iface);
2324         ifaces = talloc_zero_size(outdata, len);
2325         CTDB_NO_MEMORY(ctdb, ifaces);
2326
2327         i = 0;
2328         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2329                 strncpy(ifaces->ifaces[i].name, cur->name,
2330                         sizeof(ifaces->ifaces[i].name));
2331                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2332                 ifaces->ifaces[i].link_state = cur->link_up;
2333                 ifaces->ifaces[i].references = cur->references;
2334                 i++;
2335         }
2336         ifaces->num = i;
2337         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2338                 i*sizeof(struct ctdb_iface);
2339
2340         outdata->dsize = len;
2341         outdata->dptr  = (uint8_t *)ifaces;
2342
2343         return 0;
2344 }
2345
2346 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2347                                     struct ctdb_req_control_old *c,
2348                                     TDB_DATA indata)
2349 {
2350         struct ctdb_iface *info;
2351         struct ctdb_interface *iface;
2352         bool link_up = false;
2353
2354         info = (struct ctdb_iface *)indata.dptr;
2355
2356         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2357                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2358                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2359                                   len, len, info->name));
2360                 return -1;
2361         }
2362
2363         switch (info->link_state) {
2364         case 0:
2365                 link_up = false;
2366                 break;
2367         case 1:
2368                 link_up = true;
2369                 break;
2370         default:
2371                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2372                                   (unsigned int)info->link_state));
2373                 return -1;
2374         }
2375
2376         if (info->references != 0) {
2377                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2378                                   (unsigned int)info->references));
2379                 return -1;
2380         }
2381
2382         iface = ctdb_find_iface(ctdb, info->name);
2383         if (iface == NULL) {
2384                 return -1;
2385         }
2386
2387         if (link_up == iface->link_up) {
2388                 return 0;
2389         }
2390
2391         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2392               ("iface[%s] has changed it's link status %s => %s\n",
2393                iface->name,
2394                iface->link_up?"up":"down",
2395                link_up?"up":"down"));
2396
2397         iface->link_up = link_up;
2398         return 0;
2399 }
2400
2401
2402 /*
2403   called by a daemon to inform us of the entire list of TCP tickles for
2404   a particular public address.
2405   this control should only be sent by the node that is currently serving
2406   that public address.
2407  */
2408 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2409 {
2410         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2411         struct ctdb_tcp_array *tcparray;
2412         struct ctdb_vnn *vnn;
2413
2414         /* We must at least have tickles.num or else we cant verify the size
2415            of the received data blob
2416          */
2417         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2418                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2419                 return -1;
2420         }
2421
2422         /* verify that the size of data matches what we expect */
2423         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2424                          + sizeof(struct ctdb_connection) * list->num) {
2425                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2426                 return -1;
2427         }
2428
2429         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2430                            ctdb_addr_to_str(&list->addr)));
2431
2432         vnn = find_public_ip_vnn(ctdb, &list->addr);
2433         if (vnn == NULL) {
2434                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2435                         ctdb_addr_to_str(&list->addr)));
2436
2437                 return 1;
2438         }
2439
2440         if (vnn->pnn == ctdb->pnn) {
2441                 DEBUG(DEBUG_INFO,
2442                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2443                        ctdb_addr_to_str(&list->addr)));
2444                 return 0;
2445         }
2446
2447         /* remove any old ticklelist we might have */
2448         talloc_free(vnn->tcp_array);
2449         vnn->tcp_array = NULL;
2450
2451         tcparray = talloc(vnn, struct ctdb_tcp_array);
2452         CTDB_NO_MEMORY(ctdb, tcparray);
2453
2454         tcparray->num = list->num;
2455
2456         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2457         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2458
2459         memcpy(tcparray->connections, &list->connections[0],
2460                sizeof(struct ctdb_connection)*tcparray->num);
2461
2462         /* We now have a new fresh tickle list array for this vnn */
2463         vnn->tcp_array = tcparray;
2464
2465         return 0;
2466 }
2467
2468 /*
2469   called to return the full list of tickles for the puclic address associated 
2470   with the provided vnn
2471  */
2472 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2473 {
2474         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2475         struct ctdb_tickle_list_old *list;
2476         struct ctdb_tcp_array *tcparray;
2477         int num, i;
2478         struct ctdb_vnn *vnn;
2479         unsigned port;
2480
2481         vnn = find_public_ip_vnn(ctdb, addr);
2482         if (vnn == NULL) {
2483                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2484                         ctdb_addr_to_str(addr)));
2485
2486                 return 1;
2487         }
2488
2489         port = ctdb_addr_to_port(addr);
2490
2491         tcparray = vnn->tcp_array;
2492         num = 0;
2493         if (tcparray != NULL) {
2494                 if (port == 0) {
2495                         /* All connections */
2496                         num = tcparray->num;
2497                 } else {
2498                         /* Count connections for port */
2499                         for (i = 0; i < tcparray->num; i++) {
2500                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2501                                         num++;
2502                                 }
2503                         }
2504                 }
2505         }
2506
2507         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2508                         + sizeof(struct ctdb_connection) * num;
2509
2510         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2511         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2512         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2513
2514         list->addr = *addr;
2515         list->num = num;
2516
2517         if (num == 0) {
2518                 return 0;
2519         }
2520
2521         num = 0;
2522         for (i = 0; i < tcparray->num; i++) {
2523                 if (port == 0 || \
2524                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2525                         list->connections[num] = tcparray->connections[i];
2526                         num++;
2527                 }
2528         }
2529
2530         return 0;
2531 }
2532
2533
2534 /*
2535   set the list of all tcp tickles for a public address
2536  */
2537 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2538                                             ctdb_sock_addr *addr,
2539                                             struct ctdb_tcp_array *tcparray)
2540 {
2541         int ret, num;
2542         TDB_DATA data;
2543         struct ctdb_tickle_list_old *list;
2544
2545         if (tcparray) {
2546                 num = tcparray->num;
2547         } else {
2548                 num = 0;
2549         }
2550
2551         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2552                         sizeof(struct ctdb_connection) * num;
2553         data.dptr = talloc_size(ctdb, data.dsize);
2554         CTDB_NO_MEMORY(ctdb, data.dptr);
2555
2556         list = (struct ctdb_tickle_list_old *)data.dptr;
2557         list->addr = *addr;
2558         list->num = num;
2559         if (tcparray) {
2560                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2561         }
2562
2563         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2564                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2565                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2566         if (ret != 0) {
2567                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2568                 return -1;
2569         }
2570
2571         talloc_free(data.dptr);
2572
2573         return ret;
2574 }
2575
2576
2577 /*
2578   perform tickle updates if required
2579  */
2580 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2581                                     struct tevent_timer *te,
2582                                     struct timeval t, void *private_data)
2583 {
2584         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2585         int ret;
2586         struct ctdb_vnn *vnn;
2587
2588         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2589                 /* we only send out updates for public addresses that 
2590                    we have taken over
2591                  */
2592                 if (ctdb->pnn != vnn->pnn) {
2593                         continue;
2594                 }
2595                 /* We only send out the updates if we need to */
2596                 if (!vnn->tcp_update_needed) {
2597                         continue;
2598                 }
2599                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2600                                                        &vnn->public_address,
2601                                                        vnn->tcp_array);
2602                 if (ret != 0) {
2603                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2604                                 ctdb_addr_to_str(&vnn->public_address)));
2605                 } else {
2606                         DEBUG(DEBUG_INFO,
2607                               ("Sent tickle update for public address %s\n",
2608                                ctdb_addr_to_str(&vnn->public_address)));
2609                         vnn->tcp_update_needed = false;
2610                 }
2611         }
2612
2613         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2614                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2615                          ctdb_update_tcp_tickles, ctdb);
2616 }
2617
2618 /*
2619   start periodic update of tcp tickles
2620  */
2621 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2622 {
2623         ctdb->tickle_update_context = talloc_new(ctdb);
2624
2625         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2626                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2627                          ctdb_update_tcp_tickles, ctdb);
2628 }
2629
2630
2631
2632
2633 struct control_gratious_arp {
2634         struct ctdb_context *ctdb;
2635         ctdb_sock_addr addr;
2636         const char *iface;
2637         int count;
2638 };
2639
2640 /*
2641   send a control_gratuitous arp
2642  */
2643 static void send_gratious_arp(struct tevent_context *ev,
2644                               struct tevent_timer *te,
2645                               struct timeval t, void *private_data)
2646 {
2647         int ret;
2648         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2649                                                         struct control_gratious_arp);
2650
2651         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2652         if (ret != 0) {
2653                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2654                                  arp->iface, strerror(errno)));
2655         }
2656
2657
2658         arp->count++;
2659         if (arp->count == CTDB_ARP_REPEAT) {
2660                 talloc_free(arp);
2661                 return;
2662         }
2663
2664         tevent_add_timer(arp->ctdb->ev, arp,
2665                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2666                          send_gratious_arp, arp);
2667 }
2668
2669
2670 /*
2671   send a gratious arp 
2672  */
2673 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2674 {
2675         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2676         struct control_gratious_arp *arp;
2677
2678         /* verify the size of indata */
2679         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2680                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2681                                  (unsigned)indata.dsize, 
2682                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2683                 return -1;
2684         }
2685         if (indata.dsize != 
2686                 ( offsetof(struct ctdb_addr_info_old, iface)
2687                 + gratious_arp->len ) ){
2688
2689                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2690                         "but should be %u bytes\n", 
2691                          (unsigned)indata.dsize, 
2692                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2693                 return -1;
2694         }
2695
2696
2697         arp = talloc(ctdb, struct control_gratious_arp);
2698         CTDB_NO_MEMORY(ctdb, arp);
2699
2700         arp->ctdb  = ctdb;
2701         arp->addr   = gratious_arp->addr;
2702         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2703         CTDB_NO_MEMORY(ctdb, arp->iface);
2704         arp->count = 0;
2705
2706         tevent_add_timer(arp->ctdb->ev, arp,
2707                          timeval_zero(), send_gratious_arp, arp);
2708
2709         return 0;
2710 }
2711
2712 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2713 {
2714         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2715         int ret;
2716
2717         /* verify the size of indata */
2718         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2719                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2720                 return -1;
2721         }
2722         if (indata.dsize != 
2723                 ( offsetof(struct ctdb_addr_info_old, iface)
2724                 + pub->len ) ){
2725
2726                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2727                         "but should be %u bytes\n", 
2728                          (unsigned)indata.dsize, 
2729                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2730                 return -1;
2731         }
2732
2733         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2734
2735         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2736
2737         if (ret != 0) {
2738                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2739                 return -1;
2740         }
2741
2742         return 0;
2743 }
2744
2745 struct delete_ip_callback_state {
2746         struct ctdb_req_control_old *c;
2747 };
2748
2749 /*
2750   called when releaseip event finishes for del_public_address
2751  */
2752 static void delete_ip_callback(struct ctdb_context *ctdb,
2753                                int32_t status, TDB_DATA data,
2754                                const char *errormsg,
2755                                void *private_data)
2756 {
2757         struct delete_ip_callback_state *state =
2758                 talloc_get_type(private_data, struct delete_ip_callback_state);
2759
2760         /* If release failed then fail. */
2761         ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2762         talloc_free(private_data);
2763 }
2764
2765 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2766                                         struct ctdb_req_control_old *c,
2767                                         TDB_DATA indata, bool *async_reply)
2768 {
2769         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2770         struct ctdb_vnn *vnn;
2771
2772         /* verify the size of indata */
2773         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2774                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2775                 return -1;
2776         }
2777         if (indata.dsize != 
2778                 ( offsetof(struct ctdb_addr_info_old, iface)
2779                 + pub->len ) ){
2780
2781                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2782                         "but should be %u bytes\n", 
2783                          (unsigned)indata.dsize, 
2784                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2785                 return -1;
2786         }
2787
2788         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2789
2790         /* walk over all public addresses until we find a match */
2791         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2792                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2793                         if (vnn->pnn == ctdb->pnn) {
2794                                 struct delete_ip_callback_state *state;
2795                                 struct ctdb_public_ip *ip;
2796                                 TDB_DATA data;
2797                                 int ret;
2798
2799                                 vnn->delete_pending = true;
2800
2801                                 state = talloc(ctdb,
2802                                                struct delete_ip_callback_state);
2803                                 CTDB_NO_MEMORY(ctdb, state);
2804                                 state->c = c;
2805
2806                                 ip = talloc(state, struct ctdb_public_ip);
2807                                 if (ip == NULL) {
2808                                         DEBUG(DEBUG_ERR,
2809                                               (__location__ " Out of memory\n"));
2810                                         talloc_free(state);
2811                                         return -1;
2812                                 }
2813                                 ip->pnn = -1;
2814                                 ip->addr = pub->addr;
2815
2816                                 data.dsize = sizeof(struct ctdb_public_ip);
2817                                 data.dptr = (unsigned char *)ip;
2818
2819                                 ret = ctdb_daemon_send_control(ctdb,
2820                                                                ctdb_get_pnn(ctdb),
2821                                                                0,
2822                                                                CTDB_CONTROL_RELEASE_IP,
2823                                                                0, 0,
2824                                                                data,
2825                                                                delete_ip_callback,
2826                                                                state);
2827                                 if (ret == -1) {
2828                                         DEBUG(DEBUG_ERR,
2829                                               (__location__ "Unable to send "
2830                                                "CTDB_CONTROL_RELEASE_IP\n"));
2831                                         talloc_free(state);
2832                                         return -1;
2833                                 }
2834
2835                                 state->c = talloc_steal(state, c);
2836                                 *async_reply = true;
2837                         } else {
2838                                 /* This IP is not hosted on the
2839                                  * current node so just delete it
2840                                  * now. */
2841                                 do_delete_ip(ctdb, vnn);
2842                         }
2843
2844                         return 0;
2845                 }
2846         }
2847
2848         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2849                          ctdb_addr_to_str(&pub->addr)));
2850         return -1;
2851 }
2852
2853
2854 struct ipreallocated_callback_state {
2855         struct ctdb_req_control_old *c;
2856 };
2857
2858 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2859                                         int status, void *p)
2860 {
2861         struct ipreallocated_callback_state *state =
2862                 talloc_get_type(p, struct ipreallocated_callback_state);
2863
2864         if (status != 0) {
2865                 DEBUG(DEBUG_ERR,
2866                       (" \"ipreallocated\" event script failed (status %d)\n",
2867                        status));
2868                 if (status == -ETIME) {
2869                         ctdb_ban_self(ctdb);
2870                 }
2871         }
2872
2873         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2874         talloc_free(state);
2875 }
2876
2877 /* A control to run the ipreallocated event */
2878 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2879                                    struct ctdb_req_control_old *c,
2880                                    bool *async_reply)
2881 {
2882         int ret;
2883         struct ipreallocated_callback_state *state;
2884
2885         state = talloc(ctdb, struct ipreallocated_callback_state);
2886         CTDB_NO_MEMORY(ctdb, state);
2887
2888         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2889
2890         ret = ctdb_event_script_callback(ctdb, state,
2891                                          ctdb_ipreallocated_callback, state,
2892                                          CTDB_EVENT_IPREALLOCATED,
2893                                          "%s", "");
2894
2895         if (ret != 0) {
2896                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2897                 talloc_free(state);
2898                 return -1;
2899         }
2900
2901         /* tell the control that we will be reply asynchronously */
2902         state->c    = talloc_steal(state, c);
2903         *async_reply = true;
2904
2905         return 0;
2906 }
2907
2908
2909 struct ctdb_reloadips_handle {
2910         struct ctdb_context *ctdb;
2911         struct ctdb_req_control_old *c;
2912         int status;
2913         int fd[2];
2914         pid_t child;
2915         struct tevent_fd *fde;
2916 };
2917
2918 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2919 {
2920         if (h == h->ctdb->reload_ips) {
2921                 h->ctdb->reload_ips = NULL;
2922         }
2923         if (h->c != NULL) {
2924                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2925                 h->c = NULL;
2926         }
2927         ctdb_kill(h->ctdb, h->child, SIGKILL);
2928         return 0;
2929 }
2930
2931 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2932                                          struct tevent_timer *te,
2933                                          struct timeval t, void *private_data)
2934 {
2935         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2936
2937         talloc_free(h);
2938 }
2939
2940 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2941                                          struct tevent_fd *fde,
2942                                          uint16_t flags, void *private_data)
2943 {
2944         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2945
2946         char res;
2947         int ret;
2948
2949         ret = sys_read(h->fd[0], &res, 1);
2950         if (ret < 1 || res != 0) {
2951                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2952                 res = 1;
2953         }
2954         h->status = res;
2955
2956         talloc_free(h);
2957 }
2958
2959 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2960 {
2961         TALLOC_CTX *mem_ctx = talloc_new(NULL);
2962         struct ctdb_public_ip_list_old *ips;
2963         struct ctdb_vnn *vnn;
2964         struct client_async_data *async_data;
2965         struct timeval timeout;
2966         TDB_DATA data;
2967         struct ctdb_client_control_state *state;
2968         bool first_add;
2969         int i, ret;
2970
2971         CTDB_NO_MEMORY(ctdb, mem_ctx);
2972
2973         /* Read IPs from local node */
2974         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2975                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
2976         if (ret != 0) {
2977                 DEBUG(DEBUG_ERR,
2978                       ("Unable to fetch public IPs from local node\n"));
2979                 talloc_free(mem_ctx);
2980                 return -1;
2981         }
2982
2983         /* Read IPs file - this is safe since this is a child process */
2984         ctdb->vnn = NULL;
2985         if (ctdb_set_public_addresses(ctdb, false) != 0) {
2986                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2987                 talloc_free(mem_ctx);
2988                 return -1;
2989         }
2990
2991         async_data = talloc_zero(mem_ctx, struct client_async_data);
2992         CTDB_NO_MEMORY(ctdb, async_data);
2993
2994         /* Compare IPs between node and file for IPs to be deleted */
2995         for (i = 0; i < ips->num; i++) {
2996                 /* */
2997                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2998                         if (ctdb_same_ip(&vnn->public_address,
2999                                          &ips->ips[i].addr)) {
3000                                 /* IP is still in file */
3001                                 break;
3002                         }
3003                 }
3004
3005                 if (vnn == NULL) {
3006                         /* Delete IP ips->ips[i] */
3007                         struct ctdb_addr_info_old *pub;
3008
3009                         DEBUG(DEBUG_NOTICE,
3010                               ("IP %s no longer configured, deleting it\n",
3011                                ctdb_addr_to_str(&ips->ips[i].addr)));
3012
3013                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3014                         CTDB_NO_MEMORY(ctdb, pub);
3015
3016                         pub->addr  = ips->ips[i].addr;
3017                         pub->mask  = 0;
3018                         pub->len   = 0;
3019
3020                         timeout = TAKEOVER_TIMEOUT();
3021
3022                         data.dsize = offsetof(struct ctdb_addr_info_old,
3023                                               iface) + pub->len;
3024                         data.dptr = (uint8_t *)pub;
3025
3026                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3027                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
3028                                                   0, data, async_data,
3029                                                   &timeout, NULL);
3030                         if (state == NULL) {
3031                                 DEBUG(DEBUG_ERR,
3032                                       (__location__
3033                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3034                                 goto failed;
3035                         }
3036
3037                         ctdb_client_async_add(async_data, state);
3038                 }
3039         }
3040
3041         /* Compare IPs between node and file for IPs to be added */
3042         first_add = true;
3043         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3044                 for (i = 0; i < ips->num; i++) {
3045                         if (ctdb_same_ip(&vnn->public_address,
3046                                          &ips->ips[i].addr)) {
3047                                 /* IP already on node */
3048                                 break;
3049                         }
3050                 }
3051                 if (i == ips->num) {
3052                         /* Add IP ips->ips[i] */
3053                         struct ctdb_addr_info_old *pub;
3054                         const char *ifaces = NULL;
3055                         uint32_t len;
3056                         int iface = 0;
3057
3058                         DEBUG(DEBUG_NOTICE,
3059                               ("New IP %s configured, adding it\n",
3060                                ctdb_addr_to_str(&vnn->public_address)));
3061                         if (first_add) {
3062                                 uint32_t pnn = ctdb_get_pnn(ctdb);
3063
3064                                 data.dsize = sizeof(pnn);
3065                                 data.dptr  = (uint8_t *)&pnn;
3066
3067                                 ret = ctdb_client_send_message(
3068                                         ctdb,
3069                                         CTDB_BROADCAST_CONNECTED,
3070                                         CTDB_SRVID_REBALANCE_NODE,
3071                                         data);
3072                                 if (ret != 0) {
3073                                         DEBUG(DEBUG_WARNING,
3074                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3075                                 }
3076
3077                                 first_add = false;
3078                         }
3079
3080                         ifaces = vnn->ifaces[0];
3081                         iface = 1;
3082                         while (vnn->ifaces[iface] != NULL) {
3083                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3084                                                          vnn->ifaces[iface]);
3085                                 iface++;
3086                         }
3087
3088                         len   = strlen(ifaces) + 1;
3089                         pub = talloc_zero_size(mem_ctx,
3090                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3091                         CTDB_NO_MEMORY(ctdb, pub);
3092
3093                         pub->addr  = vnn->public_address;
3094                         pub->mask  = vnn->public_netmask_bits;
3095                         pub->len   = len;
3096                         memcpy(&pub->iface[0], ifaces, pub->len);
3097
3098                         timeout = TAKEOVER_TIMEOUT();
3099
3100                         data.dsize = offsetof(struct ctdb_addr_info_old,
3101                                               iface) + pub->len;
3102                         data.dptr = (uint8_t *)pub;
3103
3104                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3105                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3106                                                   0, data, async_data,
3107                                                   &timeout, NULL);
3108                         if (state == NULL) {
3109                                 DEBUG(DEBUG_ERR,
3110                                       (__location__
3111                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3112                                 goto failed;
3113                         }
3114
3115                         ctdb_client_async_add(async_data, state);
3116                 }
3117         }
3118
3119         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3120                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3121                 goto failed;
3122         }
3123
3124         talloc_free(mem_ctx);
3125         return 0;
3126
3127 failed:
3128         talloc_free(mem_ctx);
3129         return -1;
3130 }
3131
3132 /* This control is sent to force the node to re-read the public addresses file
3133    and drop any addresses we should nnot longer host, and add new addresses
3134    that we are now able to host
3135 */
3136 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3137 {
3138         struct ctdb_reloadips_handle *h;
3139         pid_t parent = getpid();
3140
3141         if (ctdb->reload_ips != NULL) {
3142                 talloc_free(ctdb->reload_ips);
3143                 ctdb->reload_ips = NULL;
3144         }
3145
3146         h = talloc(ctdb, struct ctdb_reloadips_handle);
3147         CTDB_NO_MEMORY(ctdb, h);
3148         h->ctdb     = ctdb;
3149         h->c        = NULL;
3150         h->status   = -1;
3151         
3152         if (pipe(h->fd) == -1) {
3153                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3154                 talloc_free(h);
3155                 return -1;
3156         }
3157
3158         h->child = ctdb_fork(ctdb);
3159         if (h->child == (pid_t)-1) {
3160                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3161                 close(h->fd[0]);
3162                 close(h->fd[1]);
3163                 talloc_free(h);
3164                 return -1;
3165         }
3166
3167         /* child process */
3168         if (h->child == 0) {
3169                 signed char res = 0;
3170
3171                 close(h->fd[0]);
3172                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3173
3174                 prctl_set_comment("ctdb_reloadips");
3175                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3176                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3177                         res = -1;
3178                 } else {
3179                         res = ctdb_reloadips_child(ctdb);
3180                         if (res != 0) {
3181                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3182                         }
3183                 }
3184
3185                 sys_write(h->fd[1], &res, 1);
3186                 ctdb_wait_for_process_to_exit(parent);
3187                 _exit(0);
3188         }
3189
3190         h->c             = talloc_steal(h, c);
3191
3192         close(h->fd[1]);
3193         set_close_on_exec(h->fd[0]);
3194
3195         talloc_set_destructor(h, ctdb_reloadips_destructor);
3196
3197
3198         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3199                                ctdb_reloadips_child_handler, (void *)h);
3200         tevent_fd_set_auto_close(h->fde);
3201
3202         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3203                          ctdb_reloadips_timeout_event, h);
3204
3205         /* we reply later */
3206         *async_reply = true;
3207         return 0;
3208 }