when creating/adding a public ip, set the initial interface to be the first interface...
[ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334         TDB_DATA data;
335
336         if (status != 0) {
337                 if (status == -ETIME) {
338                         ctdb_ban_self(ctdb);
339                 }
340                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341                                  ctdb_addr_to_str(&state->vnn->public_address),
342                                  ctdb_vnn_iface_string(state->vnn)));
343                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
344                 talloc_free(state);
345                 return;
346         }
347
348         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
349         if (ret != 0) {
350                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
351                 talloc_free(state);
352                 return;
353         }
354
355         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356         data.dsize = strlen((char *)data.dptr) + 1;
357         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
358
359         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
360
361
362         /* the control succeeded */
363         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
364         talloc_free(state);
365         return;
366 }
367
368 /*
369   take over an ip address
370  */
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372                               struct ctdb_req_control *c,
373                               struct ctdb_vnn *vnn)
374 {
375         int ret;
376         struct ctdb_do_takeip_state *state;
377
378         ret = ctdb_vnn_assign_iface(ctdb, vnn);
379         if (ret != 0) {
380                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381                                  "assin a usable interface\n",
382                                  ctdb_addr_to_str(&vnn->public_address),
383                                  vnn->public_netmask_bits));
384                 return -1;
385         }
386
387         state = talloc(vnn, struct ctdb_do_takeip_state);
388         CTDB_NO_MEMORY(ctdb, state);
389
390         state->c = talloc_steal(ctdb, c);
391         state->vnn   = vnn;
392
393         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394                             ctdb_addr_to_str(&vnn->public_address),
395                             vnn->public_netmask_bits,
396                             ctdb_vnn_iface_string(vnn)));
397
398         ret = ctdb_event_script_callback(ctdb,
399                                          state,
400                                          ctdb_do_takeip_callback,
401                                          state,
402                                          false,
403                                          CTDB_EVENT_TAKE_IP,
404                                          "%s %s %u",
405                                          ctdb_vnn_iface_string(vnn),
406                                          ctdb_addr_to_str(&vnn->public_address),
407                                          vnn->public_netmask_bits);
408
409         if (ret != 0) {
410                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411                         ctdb_addr_to_str(&vnn->public_address),
412                         ctdb_vnn_iface_string(vnn)));
413                 talloc_free(state);
414                 return -1;
415         }
416
417         return 0;
418 }
419
420 struct ctdb_do_updateip_state {
421         struct ctdb_req_control *c;
422         struct ctdb_iface *old;
423         struct ctdb_vnn *vnn;
424 };
425
426 /*
427   called when updateip event finishes
428  */
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
430                                       void *private_data)
431 {
432         struct ctdb_do_updateip_state *state =
433                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
434         int32_t ret;
435
436         if (status != 0) {
437                 if (status == -ETIME) {
438                         ctdb_ban_self(ctdb);
439                 }
440                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441                         ctdb_addr_to_str(&state->vnn->public_address),
442                         state->old->name,
443                         ctdb_vnn_iface_string(state->vnn)));
444
445                 /*
446                  * All we can do is reset the old interface
447                  * and let the next run fix it
448                  */
449                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450                 state->vnn->iface = state->old;
451                 state->vnn->iface->references++;
452
453                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
454                 talloc_free(state);
455                 return;
456         }
457
458         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
459         if (ret != 0) {
460                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
461                 talloc_free(state);
462                 return;
463         }
464
465         /* the control succeeded */
466         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
467         talloc_free(state);
468         return;
469 }
470
471 /*
472   update (move) an ip address
473  */
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475                                 struct ctdb_req_control *c,
476                                 struct ctdb_vnn *vnn)
477 {
478         int ret;
479         struct ctdb_do_updateip_state *state;
480         struct ctdb_iface *old = vnn->iface;
481
482         ctdb_vnn_unassign_iface(ctdb, vnn);
483         ret = ctdb_vnn_assign_iface(ctdb, vnn);
484         if (ret != 0) {
485                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486                                  "assin a usable interface (old iface '%s')\n",
487                                  ctdb_addr_to_str(&vnn->public_address),
488                                  vnn->public_netmask_bits,
489                                  old->name));
490                 return -1;
491         }
492
493         if (vnn->iface == old) {
494                 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
495                                  "assin a same interface '%s'\n",
496                                  ctdb_addr_to_str(&vnn->public_address),
497                                  vnn->public_netmask_bits,
498                                  old->name));
499                 return -1;
500         }
501
502         state = talloc(vnn, struct ctdb_do_updateip_state);
503         CTDB_NO_MEMORY(ctdb, state);
504
505         state->c = talloc_steal(ctdb, c);
506         state->old = old;
507         state->vnn = vnn;
508
509         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
510                             "interface %s to %s\n",
511                             ctdb_addr_to_str(&vnn->public_address),
512                             vnn->public_netmask_bits,
513                             old->name,
514                             ctdb_vnn_iface_string(vnn)));
515
516         ret = ctdb_event_script_callback(ctdb,
517                                          state,
518                                          ctdb_do_updateip_callback,
519                                          state,
520                                          false,
521                                          CTDB_EVENT_UPDATE_IP,
522                                          "%s %s %s %u",
523                                          state->old->name,
524                                          ctdb_vnn_iface_string(vnn),
525                                          ctdb_addr_to_str(&vnn->public_address),
526                                          vnn->public_netmask_bits);
527         if (ret != 0) {
528                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
529                                  ctdb_addr_to_str(&vnn->public_address),
530                                  old->name, ctdb_vnn_iface_string(vnn)));
531                 talloc_free(state);
532                 return -1;
533         }
534
535         return 0;
536 }
537
538 /*
539   Find the vnn of the node that has a public ip address
540   returns -1 if the address is not known as a public address
541  */
542 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
543 {
544         struct ctdb_vnn *vnn;
545
546         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
547                 if (ctdb_same_ip(&vnn->public_address, addr)) {
548                         return vnn;
549                 }
550         }
551
552         return NULL;
553 }
554
555 /*
556   take over an ip address
557  */
558 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
559                                  struct ctdb_req_control *c,
560                                  TDB_DATA indata,
561                                  bool *async_reply)
562 {
563         int ret;
564         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
565         struct ctdb_vnn *vnn;
566         bool have_ip = false;
567         bool do_updateip = false;
568         bool do_takeip = false;
569         struct ctdb_iface *best_iface = NULL;
570
571         if (pip->pnn != ctdb->pnn) {
572                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
573                                  "with pnn %d, but we're node %d\n",
574                                  ctdb_addr_to_str(&pip->addr),
575                                  pip->pnn, ctdb->pnn));
576                 return -1;
577         }
578
579         /* update out vnn list */
580         vnn = find_public_ip_vnn(ctdb, &pip->addr);
581         if (vnn == NULL) {
582                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
583                         ctdb_addr_to_str(&pip->addr)));
584                 return 0;
585         }
586
587         have_ip = ctdb_sys_have_ip(&pip->addr);
588         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
589         if (best_iface == NULL) {
590                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
591                                  "a usable interface (old %s, have_ip %d)\n",
592                                  ctdb_addr_to_str(&vnn->public_address),
593                                  vnn->public_netmask_bits,
594                                  ctdb_vnn_iface_string(vnn),
595                                  have_ip));
596                 return -1;
597         }
598
599         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
600                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
601                 have_ip = false;
602         }
603
604         if (vnn->iface == NULL && have_ip) {
605                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
606                                   "but we have no interface assigned, has someone manually configured it?"
607                                   "banning ourself\n",
608                                  ctdb_addr_to_str(&vnn->public_address)));
609                 ctdb_ban_self(ctdb);
610                 return -1;
611         }
612
613         if (vnn->pnn != ctdb->pnn && have_ip) {
614                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
615                                   "and we have it on iface[%s], but it was assigned to node %d"
616                                   "and we are node %d, banning ourself\n",
617                                  ctdb_addr_to_str(&vnn->public_address),
618                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
619                 ctdb_ban_self(ctdb);
620                 return -1;
621         }
622
623         if (vnn->iface) {
624                 if (vnn->iface->link_up) {
625                         /* only move when the rebalance gains something */
626                         if (vnn->iface->references > (best_iface->references + 1)) {
627                                 do_updateip = true;
628                         }
629                 } else if (vnn->iface != best_iface) {
630                         do_updateip = true;
631                 }
632         }
633
634         if (!have_ip) {
635                 if (do_updateip) {
636                         ctdb_vnn_unassign_iface(ctdb, vnn);
637                         do_updateip = false;
638                 }
639                 do_takeip = true;
640         }
641
642         if (do_takeip) {
643                 ret = ctdb_do_takeip(ctdb, c, vnn);
644                 if (ret != 0) {
645                         return -1;
646                 }
647         } else if (do_updateip) {
648                 ret = ctdb_do_updateip(ctdb, c, vnn);
649                 if (ret != 0) {
650                         return -1;
651                 }
652         } else {
653                 /*
654                  * The interface is up and the kernel known the ip
655                  * => do nothing
656                  */
657                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
658                         ctdb_addr_to_str(&pip->addr),
659                         vnn->public_netmask_bits,
660                         ctdb_vnn_iface_string(vnn)));
661                 return 0;
662         }
663
664         /* tell ctdb_control.c that we will be replying asynchronously */
665         *async_reply = true;
666
667         return 0;
668 }
669
670 /*
671   takeover an ip address old v4 style
672  */
673 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
674                                 struct ctdb_req_control *c,
675                                 TDB_DATA indata, 
676                                 bool *async_reply)
677 {
678         TDB_DATA data;
679         
680         data.dsize = sizeof(struct ctdb_public_ip);
681         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
682         CTDB_NO_MEMORY(ctdb, data.dptr);
683         
684         memcpy(data.dptr, indata.dptr, indata.dsize);
685         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
686 }
687
688 /*
689   kill any clients that are registered with a IP that is being released
690  */
691 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
692 {
693         struct ctdb_client_ip *ip;
694
695         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
696                 ctdb_addr_to_str(addr)));
697
698         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
699                 ctdb_sock_addr tmp_addr;
700
701                 tmp_addr = ip->addr;
702                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
703                         ip->client_id,
704                         ctdb_addr_to_str(&ip->addr)));
705
706                 if (ctdb_same_ip(&tmp_addr, addr)) {
707                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
708                                                                      ip->client_id, 
709                                                                      struct ctdb_client);
710                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
711                                 ip->client_id,
712                                 ctdb_addr_to_str(&ip->addr),
713                                 client->pid));
714
715                         if (client->pid != 0) {
716                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
717                                         (unsigned)client->pid,
718                                         ctdb_addr_to_str(addr),
719                                         ip->client_id));
720                                 kill(client->pid, SIGKILL);
721                         }
722                 }
723         }
724 }
725
726 /*
727   called when releaseip event finishes
728  */
729 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
730                                 void *private_data)
731 {
732         struct takeover_callback_state *state = 
733                 talloc_get_type(private_data, struct takeover_callback_state);
734         TDB_DATA data;
735
736         if (status == -ETIME) {
737                 ctdb_ban_self(ctdb);
738         }
739
740         /* send a message to all clients of this node telling them
741            that the cluster has been reconfigured and they should
742            release any sockets on this IP */
743         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
744         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
745         data.dsize = strlen((char *)data.dptr)+1;
746
747         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
748
749         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
750
751         /* kill clients that have registered with this IP */
752         release_kill_clients(ctdb, state->addr);
753
754         ctdb_vnn_unassign_iface(ctdb, state->vnn);
755
756         /* the control succeeded */
757         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
758         talloc_free(state);
759 }
760
761 /*
762   release an ip address
763  */
764 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
765                                 struct ctdb_req_control *c,
766                                 TDB_DATA indata, 
767                                 bool *async_reply)
768 {
769         int ret;
770         struct takeover_callback_state *state;
771         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
772         struct ctdb_vnn *vnn;
773
774         /* update our vnn list */
775         vnn = find_public_ip_vnn(ctdb, &pip->addr);
776         if (vnn == NULL) {
777                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
778                         ctdb_addr_to_str(&pip->addr)));
779                 return 0;
780         }
781         vnn->pnn = pip->pnn;
782
783         /* stop any previous arps */
784         talloc_free(vnn->takeover_ctx);
785         vnn->takeover_ctx = NULL;
786
787         if (!ctdb_sys_have_ip(&pip->addr)) {
788                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
789                         ctdb_addr_to_str(&pip->addr),
790                         vnn->public_netmask_bits, 
791                         ctdb_vnn_iface_string(vnn)));
792                 ctdb_vnn_unassign_iface(ctdb, vnn);
793                 return 0;
794         }
795
796         if (vnn->iface == NULL) {
797                 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
798                                   "but we have no interface assigned, has someone manually configured it?"
799                                   "banning ourself\n",
800                                  ctdb_addr_to_str(&vnn->public_address)));
801                 ctdb_ban_self(ctdb);
802                 return -1;
803         }
804
805         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
806                 ctdb_addr_to_str(&pip->addr),
807                 vnn->public_netmask_bits, 
808                 ctdb_vnn_iface_string(vnn),
809                 pip->pnn));
810
811         state = talloc(ctdb, struct takeover_callback_state);
812         CTDB_NO_MEMORY(ctdb, state);
813
814         state->c = talloc_steal(state, c);
815         state->addr = talloc(state, ctdb_sock_addr);       
816         CTDB_NO_MEMORY(ctdb, state->addr);
817         *state->addr = pip->addr;
818         state->vnn   = vnn;
819
820         ret = ctdb_event_script_callback(ctdb, 
821                                          state, release_ip_callback, state,
822                                          false,
823                                          CTDB_EVENT_RELEASE_IP,
824                                          "%s %s %u",
825                                          ctdb_vnn_iface_string(vnn),
826                                          ctdb_addr_to_str(&pip->addr),
827                                          vnn->public_netmask_bits);
828         if (ret != 0) {
829                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
830                         ctdb_addr_to_str(&pip->addr),
831                         ctdb_vnn_iface_string(vnn)));
832                 talloc_free(state);
833                 return -1;
834         }
835
836         /* tell the control that we will be reply asynchronously */
837         *async_reply = true;
838         return 0;
839 }
840
841 /*
842   release an ip address old v4 style
843  */
844 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
845                                 struct ctdb_req_control *c,
846                                 TDB_DATA indata, 
847                                 bool *async_reply)
848 {
849         TDB_DATA data;
850         
851         data.dsize = sizeof(struct ctdb_public_ip);
852         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
853         CTDB_NO_MEMORY(ctdb, data.dptr);
854         
855         memcpy(data.dptr, indata.dptr, indata.dsize);
856         return ctdb_control_release_ip(ctdb, c, data, async_reply);
857 }
858
859
860 static int ctdb_add_public_address(struct ctdb_context *ctdb,
861                                    ctdb_sock_addr *addr,
862                                    unsigned mask, const char *ifaces)
863 {
864         struct ctdb_vnn      *vnn;
865         uint32_t num = 0;
866         char *tmp;
867         const char *iface;
868         int i;
869         int ret;
870
871         /* Verify that we dont have an entry for this ip yet */
872         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
873                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
874                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
875                                 ctdb_addr_to_str(addr)));
876                         return -1;
877                 }               
878         }
879
880         /* create a new vnn structure for this ip address */
881         vnn = talloc_zero(ctdb, struct ctdb_vnn);
882         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
883         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
884         tmp = talloc_strdup(vnn, ifaces);
885         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
886         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
887                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
888                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
889                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
890                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
891                 num++;
892         }
893         talloc_free(tmp);
894         vnn->ifaces[num] = NULL;
895         vnn->public_address      = *addr;
896         vnn->public_netmask_bits = mask;
897         vnn->pnn                 = -1;
898
899         for (i=0; vnn->ifaces[i]; i++) {
900                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
901                 if (ret != 0) {
902                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
903                                            "for public_address[%s]\n",
904                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
905                         talloc_free(vnn);
906                         return -1;
907                 }
908                 if (i == 0) {
909                         vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
910                 }
911         }
912
913         DLIST_ADD(ctdb->vnn, vnn);
914
915         return 0;
916 }
917
918 /*
919   setup the event script directory
920 */
921 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
922 {
923         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
924         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
925         return 0;
926 }
927
928 /*
929   setup the public address lists from a file
930 */
931 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
932 {
933         char **lines;
934         int nlines;
935         int i;
936
937         lines = file_lines_load(alist, &nlines, ctdb);
938         if (lines == NULL) {
939                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
940                 return -1;
941         }
942         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
943                 nlines--;
944         }
945
946         for (i=0;i<nlines;i++) {
947                 unsigned mask;
948                 ctdb_sock_addr addr;
949                 const char *addrstr;
950                 const char *ifaces;
951                 char *tok, *line;
952
953                 line = lines[i];
954                 while ((*line == ' ') || (*line == '\t')) {
955                         line++;
956                 }
957                 if (*line == '#') {
958                         continue;
959                 }
960                 if (strcmp(line, "") == 0) {
961                         continue;
962                 }
963                 tok = strtok(line, " \t");
964                 addrstr = tok;
965                 tok = strtok(NULL, " \t");
966                 if (tok == NULL) {
967                         if (NULL == ctdb->default_public_interface) {
968                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
969                                          i+1));
970                                 talloc_free(lines);
971                                 return -1;
972                         }
973                         ifaces = ctdb->default_public_interface;
974                 } else {
975                         ifaces = tok;
976                 }
977
978                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
979                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
980                         talloc_free(lines);
981                         return -1;
982                 }
983                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
984                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
985                         talloc_free(lines);
986                         return -1;
987                 }
988         }
989
990         talloc_free(lines);
991         return 0;
992 }
993
994 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
995                               const char *iface,
996                               const char *ip)
997 {
998         struct ctdb_vnn *svnn;
999         bool ok;
1000         int ret;
1001
1002         svnn = talloc_zero(ctdb, struct ctdb_vnn);
1003         CTDB_NO_MEMORY(ctdb, svnn);
1004
1005         svnn->ifaces = talloc_array(svnn, const char *, 2);
1006         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1007         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1008         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1009         svnn->ifaces[1] = NULL;
1010
1011         ok = parse_ip(ip, iface, 0, &svnn->public_address);
1012         if (!ok) {
1013                 talloc_free(svnn);
1014                 return -1;
1015         }
1016
1017         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1018         if (ret != 0) {
1019                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1020                                    "for single_ip[%s]\n",
1021                                    svnn->ifaces[0],
1022                                    ctdb_addr_to_str(&svnn->public_address)));
1023                 talloc_free(svnn);
1024                 return -1;
1025         }
1026
1027         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1028         if (ret != 0) {
1029                 talloc_free(svnn);
1030                 return -1;
1031         }
1032
1033         ctdb->single_ip_vnn = svnn;
1034         return 0;
1035 }
1036
1037 struct ctdb_public_ip_list {
1038         struct ctdb_public_ip_list *next;
1039         uint32_t pnn;
1040         ctdb_sock_addr addr;
1041 };
1042
1043
1044 /* Given a physical node, return the number of
1045    public addresses that is currently assigned to this node.
1046 */
1047 static int node_ip_coverage(struct ctdb_context *ctdb, 
1048         int32_t pnn,
1049         struct ctdb_public_ip_list *ips)
1050 {
1051         int num=0;
1052
1053         for (;ips;ips=ips->next) {
1054                 if (ips->pnn == pnn) {
1055                         num++;
1056                 }
1057         }
1058         return num;
1059 }
1060
1061
1062 /* Check if this is a public ip known to the node, i.e. can that
1063    node takeover this ip ?
1064 */
1065 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1066                 struct ctdb_public_ip_list *ip)
1067 {
1068         struct ctdb_all_public_ips *public_ips;
1069         int i;
1070
1071         public_ips = ctdb->nodes[pnn]->available_public_ips;
1072
1073         if (public_ips == NULL) {
1074                 return -1;
1075         }
1076
1077         for (i=0;i<public_ips->num;i++) {
1078                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1079                         /* yes, this node can serve this public ip */
1080                         return 0;
1081                 }
1082         }
1083
1084         return -1;
1085 }
1086
1087
1088 /* search the node lists list for a node to takeover this ip.
1089    pick the node that currently are serving the least number of ips
1090    so that the ips get spread out evenly.
1091 */
1092 static int find_takeover_node(struct ctdb_context *ctdb, 
1093                 struct ctdb_node_map *nodemap, uint32_t mask, 
1094                 struct ctdb_public_ip_list *ip,
1095                 struct ctdb_public_ip_list *all_ips)
1096 {
1097         int pnn, min=0, num;
1098         int i;
1099
1100         pnn    = -1;
1101         for (i=0;i<nodemap->num;i++) {
1102                 if (nodemap->nodes[i].flags & mask) {
1103                         /* This node is not healty and can not be used to serve
1104                            a public address 
1105                         */
1106                         continue;
1107                 }
1108
1109                 /* verify that this node can serve this ip */
1110                 if (can_node_serve_ip(ctdb, i, ip)) {
1111                         /* no it couldnt   so skip to the next node */
1112                         continue;
1113                 }
1114
1115                 num = node_ip_coverage(ctdb, i, all_ips);
1116                 /* was this the first node we checked ? */
1117                 if (pnn == -1) {
1118                         pnn = i;
1119                         min  = num;
1120                 } else {
1121                         if (num < min) {
1122                                 pnn = i;
1123                                 min  = num;
1124                         }
1125                 }
1126         }       
1127         if (pnn == -1) {
1128                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1129                         ctdb_addr_to_str(&ip->addr)));
1130
1131                 return -1;
1132         }
1133
1134         ip->pnn = pnn;
1135         return 0;
1136 }
1137
1138 #define IP_KEYLEN       4
1139 static uint32_t *ip_key(ctdb_sock_addr *ip)
1140 {
1141         static uint32_t key[IP_KEYLEN];
1142
1143         bzero(key, sizeof(key));
1144
1145         switch (ip->sa.sa_family) {
1146         case AF_INET:
1147                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1148                 break;
1149         case AF_INET6:
1150                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1151                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1152                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1153                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1154                 break;
1155         default:
1156                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1157                 return key;
1158         }
1159
1160         return key;
1161 }
1162
1163 static void *add_ip_callback(void *parm, void *data)
1164 {
1165         return parm;
1166 }
1167
1168 void getips_count_callback(void *param, void *data)
1169 {
1170         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1171         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1172
1173         new_ip->next = *ip_list;
1174         *ip_list     = new_ip;
1175 }
1176
1177 static struct ctdb_public_ip_list *
1178 create_merged_ip_list(struct ctdb_context *ctdb)
1179 {
1180         int i, j;
1181         struct ctdb_public_ip_list *ip_list;
1182         struct ctdb_all_public_ips *public_ips;
1183
1184         if (ctdb->ip_tree != NULL) {
1185                 talloc_free(ctdb->ip_tree);
1186                 ctdb->ip_tree = NULL;
1187         }
1188         ctdb->ip_tree = trbt_create(ctdb, 0);
1189
1190         for (i=0;i<ctdb->num_nodes;i++) {
1191                 public_ips = ctdb->nodes[i]->known_public_ips;
1192
1193                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1194                         continue;
1195                 }
1196
1197                 /* there were no public ips for this node */
1198                 if (public_ips == NULL) {
1199                         continue;
1200                 }               
1201
1202                 for (j=0;j<public_ips->num;j++) {
1203                         struct ctdb_public_ip_list *tmp_ip; 
1204
1205                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1206                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1207                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1208                         tmp_ip->addr = public_ips->ips[j].addr;
1209                         tmp_ip->next = NULL;
1210
1211                         trbt_insertarray32_callback(ctdb->ip_tree,
1212                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1213                                 add_ip_callback,
1214                                 tmp_ip);
1215                 }
1216         }
1217
1218         ip_list = NULL;
1219         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1220
1221         return ip_list;
1222 }
1223
1224 /*
1225   make any IP alias changes for public addresses that are necessary 
1226  */
1227 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1228 {
1229         int i, num_healthy, retries;
1230         struct ctdb_public_ip ip;
1231         struct ctdb_public_ipv4 ipv4;
1232         uint32_t mask, *nodes;
1233         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1234         int maxnode, maxnum=0, minnode, minnum=0, num;
1235         TDB_DATA data;
1236         struct timeval timeout;
1237         struct client_async_data *async_data;
1238         struct ctdb_client_control_state *state;
1239         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1240
1241
1242         ZERO_STRUCT(ip);
1243
1244         /* Count how many completely healthy nodes we have */
1245         num_healthy = 0;
1246         for (i=0;i<nodemap->num;i++) {
1247                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1248                         num_healthy++;
1249                 }
1250         }
1251
1252         if (num_healthy > 0) {
1253                 /* We have healthy nodes, so only consider them for 
1254                    serving public addresses
1255                 */
1256                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1257         } else {
1258                 /* We didnt have any completely healthy nodes so
1259                    use "disabled" nodes as a fallback
1260                 */
1261                 mask = NODE_FLAGS_INACTIVE;
1262         }
1263
1264         /* since nodes only know about those public addresses that
1265            can be served by that particular node, no single node has
1266            a full list of all public addresses that exist in the cluster.
1267            Walk over all node structures and create a merged list of
1268            all public addresses that exist in the cluster.
1269
1270            keep the tree of ips around as ctdb->ip_tree
1271         */
1272         all_ips = create_merged_ip_list(ctdb);
1273
1274         /* If we want deterministic ip allocations, i.e. that the ip addresses
1275            will always be allocated the same way for a specific set of
1276            available/unavailable nodes.
1277         */
1278         if (1 == ctdb->tunable.deterministic_public_ips) {              
1279                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1280                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1281                         tmp_ip->pnn = i%nodemap->num;
1282                 }
1283         }
1284
1285
1286         /* mark all public addresses with a masked node as being served by
1287            node -1
1288         */
1289         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1290                 if (tmp_ip->pnn == -1) {
1291                         continue;
1292                 }
1293                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1294                         tmp_ip->pnn = -1;
1295                 }
1296         }
1297
1298         /* verify that the assigned nodes can serve that public ip
1299            and set it to -1 if not
1300         */
1301         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1302                 if (tmp_ip->pnn == -1) {
1303                         continue;
1304                 }
1305                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1306                         /* this node can not serve this ip. */
1307                         tmp_ip->pnn = -1;
1308                 }
1309         }
1310
1311
1312         /* now we must redistribute all public addresses with takeover node
1313            -1 among the nodes available
1314         */
1315         retries = 0;
1316 try_again:
1317         /* loop over all ip's and find a physical node to cover for 
1318            each unassigned ip.
1319         */
1320         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1321                 if (tmp_ip->pnn == -1) {
1322                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1323                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1324                                         ctdb_addr_to_str(&tmp_ip->addr)));
1325                         }
1326                 }
1327         }
1328
1329         /* If we dont want ips to fail back after a node becomes healthy
1330            again, we wont even try to reallocat the ip addresses so that
1331            they are evenly spread out.
1332            This can NOT be used at the same time as DeterministicIPs !
1333         */
1334         if (1 == ctdb->tunable.no_ip_failback) {
1335                 if (1 == ctdb->tunable.deterministic_public_ips) {
1336                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1337                 }
1338                 goto finished;
1339         }
1340
1341
1342         /* now, try to make sure the ip adresses are evenly distributed
1343            across the node.
1344            for each ip address, loop over all nodes that can serve this
1345            ip and make sure that the difference between the node
1346            serving the most and the node serving the least ip's are not greater
1347            than 1.
1348         */
1349         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1350                 if (tmp_ip->pnn == -1) {
1351                         continue;
1352                 }
1353
1354                 /* Get the highest and lowest number of ips's served by any 
1355                    valid node which can serve this ip.
1356                 */
1357                 maxnode = -1;
1358                 minnode = -1;
1359                 for (i=0;i<nodemap->num;i++) {
1360                         if (nodemap->nodes[i].flags & mask) {
1361                                 continue;
1362                         }
1363
1364                         /* only check nodes that can actually serve this ip */
1365                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1366                                 /* no it couldnt   so skip to the next node */
1367                                 continue;
1368                         }
1369
1370                         num = node_ip_coverage(ctdb, i, all_ips);
1371                         if (maxnode == -1) {
1372                                 maxnode = i;
1373                                 maxnum  = num;
1374                         } else {
1375                                 if (num > maxnum) {
1376                                         maxnode = i;
1377                                         maxnum  = num;
1378                                 }
1379                         }
1380                         if (minnode == -1) {
1381                                 minnode = i;
1382                                 minnum  = num;
1383                         } else {
1384                                 if (num < minnum) {
1385                                         minnode = i;
1386                                         minnum  = num;
1387                                 }
1388                         }
1389                 }
1390                 if (maxnode == -1) {
1391                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1392                                 ctdb_addr_to_str(&tmp_ip->addr)));
1393
1394                         continue;
1395                 }
1396
1397                 /* If we want deterministic IPs then dont try to reallocate 
1398                    them to spread out the load.
1399                 */
1400                 if (1 == ctdb->tunable.deterministic_public_ips) {
1401                         continue;
1402                 }
1403
1404                 /* if the spread between the smallest and largest coverage by
1405                    a node is >=2 we steal one of the ips from the node with
1406                    most coverage to even things out a bit.
1407                    try to do this at most 5 times  since we dont want to spend
1408                    too much time balancing the ip coverage.
1409                 */
1410                 if ( (maxnum > minnum+1)
1411                   && (retries < 5) ){
1412                         struct ctdb_public_ip_list *tmp;
1413
1414                         /* mark one of maxnode's vnn's as unassigned and try
1415                            again
1416                         */
1417                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1418                                 if (tmp->pnn == maxnode) {
1419                                         tmp->pnn = -1;
1420                                         retries++;
1421                                         goto try_again;
1422                                 }
1423                         }
1424                 }
1425         }
1426
1427
1428         /* finished distributing the public addresses, now just send the 
1429            info out to the nodes
1430         */
1431 finished:
1432
1433         /* at this point ->pnn is the node which will own each IP
1434            or -1 if there is no node that can cover this ip
1435         */
1436
1437         /* now tell all nodes to delete any alias that they should not
1438            have.  This will be a NOOP on nodes that don't currently
1439            hold the given alias */
1440         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1441         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1442
1443         for (i=0;i<nodemap->num;i++) {
1444                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1445                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1446                         continue;
1447                 }
1448
1449                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1450                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1451                                 /* This node should be serving this
1452                                    vnn so dont tell it to release the ip
1453                                 */
1454                                 continue;
1455                         }
1456                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1457                                 ipv4.pnn = tmp_ip->pnn;
1458                                 ipv4.sin = tmp_ip->addr.ip;
1459
1460                                 timeout = TAKEOVER_TIMEOUT();
1461                                 data.dsize = sizeof(ipv4);
1462                                 data.dptr  = (uint8_t *)&ipv4;
1463                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1464                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1465                                                 data, async_data,
1466                                                 &timeout, NULL);
1467                         } else {
1468                                 ip.pnn  = tmp_ip->pnn;
1469                                 ip.addr = tmp_ip->addr;
1470
1471                                 timeout = TAKEOVER_TIMEOUT();
1472                                 data.dsize = sizeof(ip);
1473                                 data.dptr  = (uint8_t *)&ip;
1474                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1475                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1476                                                 data, async_data,
1477                                                 &timeout, NULL);
1478                         }
1479
1480                         if (state == NULL) {
1481                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1482                                 talloc_free(tmp_ctx);
1483                                 return -1;
1484                         }
1485                 
1486                         ctdb_client_async_add(async_data, state);
1487                 }
1488         }
1489         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1490                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1491                 talloc_free(tmp_ctx);
1492                 return -1;
1493         }
1494         talloc_free(async_data);
1495
1496
1497         /* tell all nodes to get their own IPs */
1498         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1499         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1500         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1501                 if (tmp_ip->pnn == -1) {
1502                         /* this IP won't be taken over */
1503                         continue;
1504                 }
1505
1506                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1507                         ipv4.pnn = tmp_ip->pnn;
1508                         ipv4.sin = tmp_ip->addr.ip;
1509
1510                         timeout = TAKEOVER_TIMEOUT();
1511                         data.dsize = sizeof(ipv4);
1512                         data.dptr  = (uint8_t *)&ipv4;
1513                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1514                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1515                                         data, async_data,
1516                                         &timeout, NULL);
1517                 } else {
1518                         ip.pnn  = tmp_ip->pnn;
1519                         ip.addr = tmp_ip->addr;
1520
1521                         timeout = TAKEOVER_TIMEOUT();
1522                         data.dsize = sizeof(ip);
1523                         data.dptr  = (uint8_t *)&ip;
1524                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1525                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1526                                         data, async_data,
1527                                         &timeout, NULL);
1528                 }
1529                 if (state == NULL) {
1530                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1531                         talloc_free(tmp_ctx);
1532                         return -1;
1533                 }
1534                 
1535                 ctdb_client_async_add(async_data, state);
1536         }
1537         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1538                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1539                 talloc_free(tmp_ctx);
1540                 return -1;
1541         }
1542
1543         /* tell all nodes to update natwg */
1544         /* send the flags update natgw on all connected nodes */
1545         data.dptr  = discard_const("ipreallocated");
1546         data.dsize = strlen((char *)data.dptr) + 1; 
1547         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1548         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1549                                       nodes, 0, TAKEOVER_TIMEOUT(),
1550                                       false, data,
1551                                       NULL, NULL,
1552                                       NULL) != 0) {
1553                 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1554         }
1555
1556         talloc_free(tmp_ctx);
1557         return 0;
1558 }
1559
1560
1561 /*
1562   destroy a ctdb_client_ip structure
1563  */
1564 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1565 {
1566         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1567                 ctdb_addr_to_str(&ip->addr),
1568                 ntohs(ip->addr.ip.sin_port),
1569                 ip->client_id));
1570
1571         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1572         return 0;
1573 }
1574
1575 /*
1576   called by a client to inform us of a TCP connection that it is managing
1577   that should tickled with an ACK when IP takeover is done
1578   we handle both the old ipv4 style of packets as well as the new ipv4/6
1579   pdus.
1580  */
1581 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1582                                 TDB_DATA indata)
1583 {
1584         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1585         struct ctdb_control_tcp *old_addr = NULL;
1586         struct ctdb_control_tcp_addr new_addr;
1587         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1588         struct ctdb_tcp_list *tcp;
1589         struct ctdb_tcp_connection t;
1590         int ret;
1591         TDB_DATA data;
1592         struct ctdb_client_ip *ip;
1593         struct ctdb_vnn *vnn;
1594         ctdb_sock_addr addr;
1595
1596         switch (indata.dsize) {
1597         case sizeof(struct ctdb_control_tcp):
1598                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1599                 ZERO_STRUCT(new_addr);
1600                 tcp_sock = &new_addr;
1601                 tcp_sock->src.ip  = old_addr->src;
1602                 tcp_sock->dest.ip = old_addr->dest;
1603                 break;
1604         case sizeof(struct ctdb_control_tcp_addr):
1605                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1606                 break;
1607         default:
1608                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1609                                  "to ctdb_control_tcp_client. size was %d but "
1610                                  "only allowed sizes are %lu and %lu\n",
1611                                  (int)indata.dsize,
1612                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1613                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1614                 return -1;
1615         }
1616
1617         addr = tcp_sock->src;
1618         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1619         addr = tcp_sock->dest;
1620         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1621
1622         ZERO_STRUCT(addr);
1623         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1624         vnn = find_public_ip_vnn(ctdb, &addr);
1625         if (vnn == NULL) {
1626                 switch (addr.sa.sa_family) {
1627                 case AF_INET:
1628                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1629                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1630                                         ctdb_addr_to_str(&addr)));
1631                         }
1632                         break;
1633                 case AF_INET6:
1634                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1635                                 ctdb_addr_to_str(&addr)));
1636                         break;
1637                 default:
1638                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1639                 }
1640
1641                 return 0;
1642         }
1643
1644         if (vnn->pnn != ctdb->pnn) {
1645                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1646                         ctdb_addr_to_str(&addr),
1647                         client_id, client->pid));
1648                 /* failing this call will tell smbd to die */
1649                 return -1;
1650         }
1651
1652         ip = talloc(client, struct ctdb_client_ip);
1653         CTDB_NO_MEMORY(ctdb, ip);
1654
1655         ip->ctdb      = ctdb;
1656         ip->addr      = addr;
1657         ip->client_id = client_id;
1658         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1659         DLIST_ADD(ctdb->client_ip_list, ip);
1660
1661         tcp = talloc(client, struct ctdb_tcp_list);
1662         CTDB_NO_MEMORY(ctdb, tcp);
1663
1664         tcp->connection.src_addr = tcp_sock->src;
1665         tcp->connection.dst_addr = tcp_sock->dest;
1666
1667         DLIST_ADD(client->tcp_list, tcp);
1668
1669         t.src_addr = tcp_sock->src;
1670         t.dst_addr = tcp_sock->dest;
1671
1672         data.dptr = (uint8_t *)&t;
1673         data.dsize = sizeof(t);
1674
1675         switch (addr.sa.sa_family) {
1676         case AF_INET:
1677                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1678                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1679                         ctdb_addr_to_str(&tcp_sock->src),
1680                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1681                 break;
1682         case AF_INET6:
1683                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1684                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1685                         ctdb_addr_to_str(&tcp_sock->src),
1686                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1687                 break;
1688         default:
1689                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1690         }
1691
1692
1693         /* tell all nodes about this tcp connection */
1694         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1695                                        CTDB_CONTROL_TCP_ADD,
1696                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1697         if (ret != 0) {
1698                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1699                 return -1;
1700         }
1701
1702         return 0;
1703 }
1704
1705 /*
1706   find a tcp address on a list
1707  */
1708 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1709                                            struct ctdb_tcp_connection *tcp)
1710 {
1711         int i;
1712
1713         if (array == NULL) {
1714                 return NULL;
1715         }
1716
1717         for (i=0;i<array->num;i++) {
1718                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1719                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1720                         return &array->connections[i];
1721                 }
1722         }
1723         return NULL;
1724 }
1725
1726
1727
1728 /*
1729   called by a daemon to inform us of a TCP connection that one of its
1730   clients managing that should tickled with an ACK when IP takeover is
1731   done
1732  */
1733 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1734 {
1735         struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1736         struct ctdb_tcp_array *tcparray;
1737         struct ctdb_tcp_connection tcp;
1738         struct ctdb_vnn *vnn;
1739
1740         vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1741         if (vnn == NULL) {
1742                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1743                         ctdb_addr_to_str(&p->dst_addr)));
1744
1745                 return -1;
1746         }
1747
1748
1749         tcparray = vnn->tcp_array;
1750
1751         /* If this is the first tickle */
1752         if (tcparray == NULL) {
1753                 tcparray = talloc_size(ctdb->nodes, 
1754                         offsetof(struct ctdb_tcp_array, connections) +
1755                         sizeof(struct ctdb_tcp_connection) * 1);
1756                 CTDB_NO_MEMORY(ctdb, tcparray);
1757                 vnn->tcp_array = tcparray;
1758
1759                 tcparray->num = 0;
1760                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1761                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1762
1763                 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1764                 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1765                 tcparray->num++;
1766
1767                 if (tcp_update_needed) {
1768                         vnn->tcp_update_needed = true;
1769                 }
1770                 return 0;
1771         }
1772
1773
1774         /* Do we already have this tickle ?*/
1775         tcp.src_addr = p->src_addr;
1776         tcp.dst_addr = p->dst_addr;
1777         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1778                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1779                         ctdb_addr_to_str(&tcp.dst_addr),
1780                         ntohs(tcp.dst_addr.ip.sin_port),
1781                         vnn->pnn));
1782                 return 0;
1783         }
1784
1785         /* A new tickle, we must add it to the array */
1786         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1787                                         struct ctdb_tcp_connection,
1788                                         tcparray->num+1);
1789         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1790
1791         vnn->tcp_array = tcparray;
1792         tcparray->connections[tcparray->num].src_addr = p->src_addr;
1793         tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1794         tcparray->num++;
1795                                 
1796         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1797                 ctdb_addr_to_str(&tcp.dst_addr),
1798                 ntohs(tcp.dst_addr.ip.sin_port),
1799                 vnn->pnn));
1800
1801         if (tcp_update_needed) {
1802                 vnn->tcp_update_needed = true;
1803         }
1804
1805         return 0;
1806 }
1807
1808
1809 /*
1810   called by a daemon to inform us of a TCP connection that one of its
1811   clients managing that should tickled with an ACK when IP takeover is
1812   done
1813  */
1814 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1815 {
1816         struct ctdb_tcp_connection *tcpp;
1817         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1818
1819         if (vnn == NULL) {
1820                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1821                         ctdb_addr_to_str(&conn->dst_addr)));
1822                 return;
1823         }
1824
1825         /* if the array is empty we cant remove it
1826            and we dont need to do anything
1827          */
1828         if (vnn->tcp_array == NULL) {
1829                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1830                         ctdb_addr_to_str(&conn->dst_addr),
1831                         ntohs(conn->dst_addr.ip.sin_port)));
1832                 return;
1833         }
1834
1835
1836         /* See if we know this connection
1837            if we dont know this connection  then we dont need to do anything
1838          */
1839         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1840         if (tcpp == NULL) {
1841                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1842                         ctdb_addr_to_str(&conn->dst_addr),
1843                         ntohs(conn->dst_addr.ip.sin_port)));
1844                 return;
1845         }
1846
1847
1848         /* We need to remove this entry from the array.
1849            Instead of allocating a new array and copying data to it
1850            we cheat and just copy the last entry in the existing array
1851            to the entry that is to be removed and just shring the 
1852            ->num field
1853          */
1854         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1855         vnn->tcp_array->num--;
1856
1857         /* If we deleted the last entry we also need to remove the entire array
1858          */
1859         if (vnn->tcp_array->num == 0) {
1860                 talloc_free(vnn->tcp_array);
1861                 vnn->tcp_array = NULL;
1862         }               
1863
1864         vnn->tcp_update_needed = true;
1865
1866         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1867                 ctdb_addr_to_str(&conn->src_addr),
1868                 ntohs(conn->src_addr.ip.sin_port)));
1869 }
1870
1871
1872 /*
1873   called by a daemon to inform us of a TCP connection that one of its
1874   clients used are no longer needed in the tickle database
1875  */
1876 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1877 {
1878         struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1879
1880         ctdb_remove_tcp_connection(ctdb, conn);
1881
1882         return 0;
1883 }
1884
1885
1886 /*
1887   called when a daemon restarts - send all tickes for all public addresses
1888   we are serving immediately to the new node.
1889  */
1890 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1891 {
1892 /*XXX here we should send all tickes we are serving to the new node */
1893         return 0;
1894 }
1895
1896
1897 /*
1898   called when a client structure goes away - hook to remove
1899   elements from the tcp_list in all daemons
1900  */
1901 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1902 {
1903         while (client->tcp_list) {
1904                 struct ctdb_tcp_list *tcp = client->tcp_list;
1905                 DLIST_REMOVE(client->tcp_list, tcp);
1906                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1907         }
1908 }
1909
1910
1911 /*
1912   release all IPs on shutdown
1913  */
1914 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1915 {
1916         struct ctdb_vnn *vnn;
1917
1918         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1919                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1920                         ctdb_vnn_unassign_iface(ctdb, vnn);
1921                         continue;
1922                 }
1923                 if (!vnn->iface) {
1924                         continue;
1925                 }
1926                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1927                                   ctdb_vnn_iface_string(vnn),
1928                                   ctdb_addr_to_str(&vnn->public_address),
1929                                   vnn->public_netmask_bits);
1930                 release_kill_clients(ctdb, &vnn->public_address);
1931                 ctdb_vnn_unassign_iface(ctdb, vnn);
1932         }
1933 }
1934
1935
1936 /*
1937   get list of public IPs
1938  */
1939 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1940                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1941 {
1942         int i, num, len;
1943         struct ctdb_all_public_ips *ips;
1944         struct ctdb_vnn *vnn;
1945         bool only_available = false;
1946
1947         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1948                 only_available = true;
1949         }
1950
1951         /* count how many public ip structures we have */
1952         num = 0;
1953         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1954                 num++;
1955         }
1956
1957         len = offsetof(struct ctdb_all_public_ips, ips) + 
1958                 num*sizeof(struct ctdb_public_ip);
1959         ips = talloc_zero_size(outdata, len);
1960         CTDB_NO_MEMORY(ctdb, ips);
1961
1962         i = 0;
1963         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1964                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1965                         continue;
1966                 }
1967                 ips->ips[i].pnn  = vnn->pnn;
1968                 ips->ips[i].addr = vnn->public_address;
1969                 i++;
1970         }
1971         ips->num = i;
1972         len = offsetof(struct ctdb_all_public_ips, ips) +
1973                 i*sizeof(struct ctdb_public_ip);
1974
1975         outdata->dsize = len;
1976         outdata->dptr  = (uint8_t *)ips;
1977
1978         return 0;
1979 }
1980
1981
1982 /*
1983   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1984  */
1985 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1986                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1987 {
1988         int i, num, len;
1989         struct ctdb_all_public_ipsv4 *ips;
1990         struct ctdb_vnn *vnn;
1991
1992         /* count how many public ip structures we have */
1993         num = 0;
1994         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1995                 if (vnn->public_address.sa.sa_family != AF_INET) {
1996                         continue;
1997                 }
1998                 num++;
1999         }
2000
2001         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
2002                 num*sizeof(struct ctdb_public_ipv4);
2003         ips = talloc_zero_size(outdata, len);
2004         CTDB_NO_MEMORY(ctdb, ips);
2005
2006         outdata->dsize = len;
2007         outdata->dptr  = (uint8_t *)ips;
2008
2009         ips->num = num;
2010         i = 0;
2011         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2012                 if (vnn->public_address.sa.sa_family != AF_INET) {
2013                         continue;
2014                 }
2015                 ips->ips[i].pnn = vnn->pnn;
2016                 ips->ips[i].sin = vnn->public_address.ip;
2017                 i++;
2018         }
2019
2020         return 0;
2021 }
2022
2023 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2024                                         struct ctdb_req_control *c,
2025                                         TDB_DATA indata,
2026                                         TDB_DATA *outdata)
2027 {
2028         int i, num, len;
2029         ctdb_sock_addr *addr;
2030         struct ctdb_control_public_ip_info *info;
2031         struct ctdb_vnn *vnn;
2032
2033         addr = (ctdb_sock_addr *)indata.dptr;
2034
2035         vnn = find_public_ip_vnn(ctdb, addr);
2036         if (vnn == NULL) {
2037                 /* if it is not a public ip   it could be our 'single ip' */
2038                 if (ctdb->single_ip_vnn) {
2039                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2040                                 vnn = ctdb->single_ip_vnn;
2041                         }
2042                 }
2043         }
2044         if (vnn == NULL) {
2045                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2046                                  "'%s'not a public address\n",
2047                                  ctdb_addr_to_str(addr)));
2048                 return -1;
2049         }
2050
2051         /* count how many public ip structures we have */
2052         num = 0;
2053         for (;vnn->ifaces[num];) {
2054                 num++;
2055         }
2056
2057         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2058                 num*sizeof(struct ctdb_control_iface_info);
2059         info = talloc_zero_size(outdata, len);
2060         CTDB_NO_MEMORY(ctdb, info);
2061
2062         info->ip.addr = vnn->public_address;
2063         info->ip.pnn = vnn->pnn;
2064         info->active_idx = 0xFFFFFFFF;
2065
2066         for (i=0; vnn->ifaces[i]; i++) {
2067                 struct ctdb_iface *cur;
2068
2069                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2070                 if (cur == NULL) {
2071                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2072                                            vnn->ifaces[i]));
2073                         return -1;
2074                 }
2075                 if (vnn->iface == cur) {
2076                         info->active_idx = i;
2077                 }
2078                 strcpy(info->ifaces[i].name, cur->name);
2079                 info->ifaces[i].link_state = cur->link_up;
2080                 info->ifaces[i].references = cur->references;
2081         }
2082         info->num = i;
2083         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2084                 i*sizeof(struct ctdb_control_iface_info);
2085
2086         outdata->dsize = len;
2087         outdata->dptr  = (uint8_t *)info;
2088
2089         return 0;
2090 }
2091
2092 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2093                                 struct ctdb_req_control *c,
2094                                 TDB_DATA *outdata)
2095 {
2096         int i, num, len;
2097         struct ctdb_control_get_ifaces *ifaces;
2098         struct ctdb_iface *cur;
2099
2100         /* count how many public ip structures we have */
2101         num = 0;
2102         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2103                 num++;
2104         }
2105
2106         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2107                 num*sizeof(struct ctdb_control_iface_info);
2108         ifaces = talloc_zero_size(outdata, len);
2109         CTDB_NO_MEMORY(ctdb, ifaces);
2110
2111         i = 0;
2112         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2113                 strcpy(ifaces->ifaces[i].name, cur->name);
2114                 ifaces->ifaces[i].link_state = cur->link_up;
2115                 ifaces->ifaces[i].references = cur->references;
2116                 i++;
2117         }
2118         ifaces->num = i;
2119         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2120                 i*sizeof(struct ctdb_control_iface_info);
2121
2122         outdata->dsize = len;
2123         outdata->dptr  = (uint8_t *)ifaces;
2124
2125         return 0;
2126 }
2127
2128 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2129                                     struct ctdb_req_control *c,
2130                                     TDB_DATA indata)
2131 {
2132         struct ctdb_control_iface_info *info;
2133         struct ctdb_iface *iface;
2134         bool link_up = false;
2135
2136         info = (struct ctdb_control_iface_info *)indata.dptr;
2137
2138         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2139                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2140                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2141                                   len, len, info->name));
2142                 return -1;
2143         }
2144
2145         switch (info->link_state) {
2146         case 0:
2147                 link_up = false;
2148                 break;
2149         case 1:
2150                 link_up = true;
2151                 break;
2152         default:
2153                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2154                                   (unsigned int)info->link_state));
2155                 return -1;
2156         }
2157
2158         if (info->references != 0) {
2159                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2160                                   (unsigned int)info->references));
2161                 return -1;
2162         }
2163
2164         iface = ctdb_find_iface(ctdb, info->name);
2165         if (iface == NULL) {
2166                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2167                                   info->name));
2168                 return -1;
2169         }
2170
2171         if (link_up == iface->link_up) {
2172                 return 0;
2173         }
2174
2175         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2176               ("iface[%s] has changed it's link status %s => %s\n",
2177                iface->name,
2178                iface->link_up?"up":"down",
2179                link_up?"up":"down"));
2180
2181         iface->link_up = link_up;
2182         return 0;
2183 }
2184
2185
2186 /* 
2187    structure containing the listening socket and the list of tcp connections
2188    that the ctdb daemon is to kill
2189 */
2190 struct ctdb_kill_tcp {
2191         struct ctdb_vnn *vnn;
2192         struct ctdb_context *ctdb;
2193         int capture_fd;
2194         struct fd_event *fde;
2195         trbt_tree_t *connections;
2196         void *private_data;
2197 };
2198
2199 /*
2200   a tcp connection that is to be killed
2201  */
2202 struct ctdb_killtcp_con {
2203         ctdb_sock_addr src_addr;
2204         ctdb_sock_addr dst_addr;
2205         int count;
2206         struct ctdb_kill_tcp *killtcp;
2207 };
2208
2209 /* this function is used to create a key to represent this socketpair
2210    in the killtcp tree.
2211    this key is used to insert and lookup matching socketpairs that are
2212    to be tickled and RST
2213 */
2214 #define KILLTCP_KEYLEN  10
2215 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2216 {
2217         static uint32_t key[KILLTCP_KEYLEN];
2218
2219         bzero(key, sizeof(key));
2220
2221         if (src->sa.sa_family != dst->sa.sa_family) {
2222                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2223                 return key;
2224         }
2225         
2226         switch (src->sa.sa_family) {
2227         case AF_INET:
2228                 key[0]  = dst->ip.sin_addr.s_addr;
2229                 key[1]  = src->ip.sin_addr.s_addr;
2230                 key[2]  = dst->ip.sin_port;
2231                 key[3]  = src->ip.sin_port;
2232                 break;
2233         case AF_INET6:
2234                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2235                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2236                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2237                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2238                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2239                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2240                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2241                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2242                 key[8]  = dst->ip6.sin6_port;
2243                 key[9]  = src->ip6.sin6_port;
2244                 break;
2245         default:
2246                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2247                 return key;
2248         }
2249
2250         return key;
2251 }
2252
2253 /*
2254   called when we get a read event on the raw socket
2255  */
2256 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2257                                 uint16_t flags, void *private_data)
2258 {
2259         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2260         struct ctdb_killtcp_con *con;
2261         ctdb_sock_addr src, dst;
2262         uint32_t ack_seq, seq;
2263
2264         if (!(flags & EVENT_FD_READ)) {
2265                 return;
2266         }
2267
2268         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2269                                 killtcp->private_data,
2270                                 &src, &dst,
2271                                 &ack_seq, &seq) != 0) {
2272                 /* probably a non-tcp ACK packet */
2273                 return;
2274         }
2275
2276         /* check if we have this guy in our list of connections
2277            to kill
2278         */
2279         con = trbt_lookuparray32(killtcp->connections, 
2280                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2281         if (con == NULL) {
2282                 /* no this was some other packet we can just ignore */
2283                 return;
2284         }
2285
2286         /* This one has been tickled !
2287            now reset him and remove him from the list.
2288          */
2289         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2290                 ntohs(con->dst_addr.ip.sin_port),
2291                 ctdb_addr_to_str(&con->src_addr),
2292                 ntohs(con->src_addr.ip.sin_port)));
2293
2294         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2295         talloc_free(con);
2296 }
2297
2298
2299 /* when traversing the list of all tcp connections to send tickle acks to
2300    (so that we can capture the ack coming back and kill the connection
2301     by a RST)
2302    this callback is called for each connection we are currently trying to kill
2303 */
2304 static void tickle_connection_traverse(void *param, void *data)
2305 {
2306         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2307
2308         /* have tried too many times, just give up */
2309         if (con->count >= 5) {
2310                 /* can't delete in traverse: reparent to delete_cons */
2311                 talloc_steal(param, con);
2312                 return;
2313         }
2314
2315         /* othervise, try tickling it again */
2316         con->count++;
2317         ctdb_sys_send_tcp(
2318                 (ctdb_sock_addr *)&con->dst_addr,
2319                 (ctdb_sock_addr *)&con->src_addr,
2320                 0, 0, 0);
2321 }
2322
2323
2324 /* 
2325    called every second until all sentenced connections have been reset
2326  */
2327 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2328                                               struct timeval t, void *private_data)
2329 {
2330         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2331         void *delete_cons = talloc_new(NULL);
2332
2333         /* loop over all connections sending tickle ACKs */
2334         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2335
2336         /* now we've finished traverse, it's safe to do deletion. */
2337         talloc_free(delete_cons);
2338
2339         /* If there are no more connections to kill we can remove the
2340            entire killtcp structure
2341          */
2342         if ( (killtcp->connections == NULL) || 
2343              (killtcp->connections->root == NULL) ) {
2344                 talloc_free(killtcp);
2345                 return;
2346         }
2347
2348         /* try tickling them again in a seconds time
2349          */
2350         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2351                         ctdb_tickle_sentenced_connections, killtcp);
2352 }
2353
2354 /*
2355   destroy the killtcp structure
2356  */
2357 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2358 {
2359         if (killtcp->vnn) {
2360                 killtcp->vnn->killtcp = NULL;
2361         }
2362         return 0;
2363 }
2364
2365
2366 /* nothing fancy here, just unconditionally replace any existing
2367    connection structure with the new one.
2368
2369    dont even free the old one if it did exist, that one is talloc_stolen
2370    by the same node in the tree anyway and will be deleted when the new data 
2371    is deleted
2372 */
2373 static void *add_killtcp_callback(void *parm, void *data)
2374 {
2375         return parm;
2376 }
2377
2378 /*
2379   add a tcp socket to the list of connections we want to RST
2380  */
2381 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2382                                        ctdb_sock_addr *s,
2383                                        ctdb_sock_addr *d)
2384 {
2385         ctdb_sock_addr src, dst;
2386         struct ctdb_kill_tcp *killtcp;
2387         struct ctdb_killtcp_con *con;
2388         struct ctdb_vnn *vnn;
2389
2390         ctdb_canonicalize_ip(s, &src);
2391         ctdb_canonicalize_ip(d, &dst);
2392
2393         vnn = find_public_ip_vnn(ctdb, &dst);
2394         if (vnn == NULL) {
2395                 vnn = find_public_ip_vnn(ctdb, &src);
2396         }
2397         if (vnn == NULL) {
2398                 /* if it is not a public ip   it could be our 'single ip' */
2399                 if (ctdb->single_ip_vnn) {
2400                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2401                                 vnn = ctdb->single_ip_vnn;
2402                         }
2403                 }
2404         }
2405         if (vnn == NULL) {
2406                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2407                 return -1;
2408         }
2409
2410         killtcp = vnn->killtcp;
2411         
2412         /* If this is the first connection to kill we must allocate
2413            a new structure
2414          */
2415         if (killtcp == NULL) {
2416                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2417                 CTDB_NO_MEMORY(ctdb, killtcp);
2418
2419                 killtcp->vnn         = vnn;
2420                 killtcp->ctdb        = ctdb;
2421                 killtcp->capture_fd  = -1;
2422                 killtcp->connections = trbt_create(killtcp, 0);
2423
2424                 vnn->killtcp         = killtcp;
2425                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2426         }
2427
2428
2429
2430         /* create a structure that describes this connection we want to
2431            RST and store it in killtcp->connections
2432         */
2433         con = talloc(killtcp, struct ctdb_killtcp_con);
2434         CTDB_NO_MEMORY(ctdb, con);
2435         con->src_addr = src;
2436         con->dst_addr = dst;
2437         con->count    = 0;
2438         con->killtcp  = killtcp;
2439
2440
2441         trbt_insertarray32_callback(killtcp->connections,
2442                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2443                         add_killtcp_callback, con);
2444
2445         /* 
2446            If we dont have a socket to listen on yet we must create it
2447          */
2448         if (killtcp->capture_fd == -1) {
2449                 const char *iface = ctdb_vnn_iface_string(vnn);
2450                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2451                 if (killtcp->capture_fd == -1) {
2452                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2453                                           "socket on iface '%s' for killtcp (%s)\n",
2454                                           iface, strerror(errno)));
2455                         goto failed;
2456                 }
2457         }
2458
2459
2460         if (killtcp->fde == NULL) {
2461                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2462                                             EVENT_FD_READ,
2463                                             capture_tcp_handler, killtcp);
2464                 tevent_fd_set_auto_close(killtcp->fde);
2465
2466                 /* We also need to set up some events to tickle all these connections
2467                    until they are all reset
2468                 */
2469                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2470                                 ctdb_tickle_sentenced_connections, killtcp);
2471         }
2472
2473         /* tickle him once now */
2474         ctdb_sys_send_tcp(
2475                 &con->dst_addr,
2476                 &con->src_addr,
2477                 0, 0, 0);
2478
2479         return 0;
2480
2481 failed:
2482         talloc_free(vnn->killtcp);
2483         vnn->killtcp = NULL;
2484         return -1;
2485 }
2486
2487 /*
2488   kill a TCP connection.
2489  */
2490 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2491 {
2492         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2493
2494         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2495 }
2496
2497 /*
2498   called by a daemon to inform us of the entire list of TCP tickles for
2499   a particular public address.
2500   this control should only be sent by the node that is currently serving
2501   that public address.
2502  */
2503 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2504 {
2505         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2506         struct ctdb_tcp_array *tcparray;
2507         struct ctdb_vnn *vnn;
2508
2509         /* We must at least have tickles.num or else we cant verify the size
2510            of the received data blob
2511          */
2512         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2513                                         tickles.connections)) {
2514                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2515                 return -1;
2516         }
2517
2518         /* verify that the size of data matches what we expect */
2519         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2520                                 tickles.connections)
2521                          + sizeof(struct ctdb_tcp_connection)
2522                                  * list->tickles.num) {
2523                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2524                 return -1;
2525         }       
2526
2527         vnn = find_public_ip_vnn(ctdb, &list->addr);
2528         if (vnn == NULL) {
2529                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2530                         ctdb_addr_to_str(&list->addr)));
2531
2532                 return 1;
2533         }
2534
2535         /* remove any old ticklelist we might have */
2536         talloc_free(vnn->tcp_array);
2537         vnn->tcp_array = NULL;
2538
2539         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2540         CTDB_NO_MEMORY(ctdb, tcparray);
2541
2542         tcparray->num = list->tickles.num;
2543
2544         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2545         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2546
2547         memcpy(tcparray->connections, &list->tickles.connections[0], 
2548                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2549
2550         /* We now have a new fresh tickle list array for this vnn */
2551         vnn->tcp_array = talloc_steal(vnn, tcparray);
2552         
2553         return 0;
2554 }
2555
2556 /*
2557   called to return the full list of tickles for the puclic address associated 
2558   with the provided vnn
2559  */
2560 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2561 {
2562         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2563         struct ctdb_control_tcp_tickle_list *list;
2564         struct ctdb_tcp_array *tcparray;
2565         int num;
2566         struct ctdb_vnn *vnn;
2567
2568         vnn = find_public_ip_vnn(ctdb, addr);
2569         if (vnn == NULL) {
2570                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2571                         ctdb_addr_to_str(addr)));
2572
2573                 return 1;
2574         }
2575
2576         tcparray = vnn->tcp_array;
2577         if (tcparray) {
2578                 num = tcparray->num;
2579         } else {
2580                 num = 0;
2581         }
2582
2583         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2584                                 tickles.connections)
2585                         + sizeof(struct ctdb_tcp_connection) * num;
2586
2587         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2588         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2589         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2590
2591         list->addr = *addr;
2592         list->tickles.num = num;
2593         if (num) {
2594                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2595                         sizeof(struct ctdb_tcp_connection) * num);
2596         }
2597
2598         return 0;
2599 }
2600
2601
2602 /*
2603   set the list of all tcp tickles for a public address
2604  */
2605 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2606                               struct timeval timeout, uint32_t destnode, 
2607                               ctdb_sock_addr *addr,
2608                               struct ctdb_tcp_array *tcparray)
2609 {
2610         int ret, num;
2611         TDB_DATA data;
2612         struct ctdb_control_tcp_tickle_list *list;
2613
2614         if (tcparray) {
2615                 num = tcparray->num;
2616         } else {
2617                 num = 0;
2618         }
2619
2620         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2621                                 tickles.connections) +
2622                         sizeof(struct ctdb_tcp_connection) * num;
2623         data.dptr = talloc_size(ctdb, data.dsize);
2624         CTDB_NO_MEMORY(ctdb, data.dptr);
2625
2626         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2627         list->addr = *addr;
2628         list->tickles.num = num;
2629         if (tcparray) {
2630                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2631         }
2632
2633         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2634                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2635                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2636         if (ret != 0) {
2637                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2638                 return -1;
2639         }
2640
2641         talloc_free(data.dptr);
2642
2643         return ret;
2644 }
2645
2646
2647 /*
2648   perform tickle updates if required
2649  */
2650 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2651                                 struct timed_event *te, 
2652                                 struct timeval t, void *private_data)
2653 {
2654         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2655         int ret;
2656         struct ctdb_vnn *vnn;
2657
2658         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2659                 /* we only send out updates for public addresses that 
2660                    we have taken over
2661                  */
2662                 if (ctdb->pnn != vnn->pnn) {
2663                         continue;
2664                 }
2665                 /* We only send out the updates if we need to */
2666                 if (!vnn->tcp_update_needed) {
2667                         continue;
2668                 }
2669                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2670                                 TAKEOVER_TIMEOUT(),
2671                                 CTDB_BROADCAST_CONNECTED,
2672                                 &vnn->public_address,
2673                                 vnn->tcp_array);
2674                 if (ret != 0) {
2675                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2676                                 ctdb_addr_to_str(&vnn->public_address)));
2677                 }
2678         }
2679
2680         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2681                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2682                              ctdb_update_tcp_tickles, ctdb);
2683 }               
2684         
2685
2686 /*
2687   start periodic update of tcp tickles
2688  */
2689 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2690 {
2691         ctdb->tickle_update_context = talloc_new(ctdb);
2692
2693         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2694                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2695                              ctdb_update_tcp_tickles, ctdb);
2696 }
2697
2698
2699
2700
2701 struct control_gratious_arp {
2702         struct ctdb_context *ctdb;
2703         ctdb_sock_addr addr;
2704         const char *iface;
2705         int count;
2706 };
2707
2708 /*
2709   send a control_gratuitous arp
2710  */
2711 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2712                                   struct timeval t, void *private_data)
2713 {
2714         int ret;
2715         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2716                                                         struct control_gratious_arp);
2717
2718         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2719         if (ret != 0) {
2720                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2721                                  arp->iface, strerror(errno)));
2722         }
2723
2724
2725         arp->count++;
2726         if (arp->count == CTDB_ARP_REPEAT) {
2727                 talloc_free(arp);
2728                 return;
2729         }
2730
2731         event_add_timed(arp->ctdb->ev, arp, 
2732                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2733                         send_gratious_arp, arp);
2734 }
2735
2736
2737 /*
2738   send a gratious arp 
2739  */
2740 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2741 {
2742         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2743         struct control_gratious_arp *arp;
2744
2745         /* verify the size of indata */
2746         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2747                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2748                                  (unsigned)indata.dsize, 
2749                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2750                 return -1;
2751         }
2752         if (indata.dsize != 
2753                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2754                 + gratious_arp->len ) ){
2755
2756                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2757                         "but should be %u bytes\n", 
2758                          (unsigned)indata.dsize, 
2759                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2760                 return -1;
2761         }
2762
2763
2764         arp = talloc(ctdb, struct control_gratious_arp);
2765         CTDB_NO_MEMORY(ctdb, arp);
2766
2767         arp->ctdb  = ctdb;
2768         arp->addr   = gratious_arp->addr;
2769         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2770         CTDB_NO_MEMORY(ctdb, arp->iface);
2771         arp->count = 0;
2772         
2773         event_add_timed(arp->ctdb->ev, arp, 
2774                         timeval_zero(), send_gratious_arp, arp);
2775
2776         return 0;
2777 }
2778
2779 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2780 {
2781         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2782         int ret;
2783
2784         /* verify the size of indata */
2785         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2786                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2787                 return -1;
2788         }
2789         if (indata.dsize != 
2790                 ( offsetof(struct ctdb_control_ip_iface, iface)
2791                 + pub->len ) ){
2792
2793                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2794                         "but should be %u bytes\n", 
2795                          (unsigned)indata.dsize, 
2796                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2797                 return -1;
2798         }
2799
2800         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2801
2802         if (ret != 0) {
2803                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2804                 return -1;
2805         }
2806
2807         return 0;
2808 }
2809
2810 /*
2811   called when releaseip event finishes for del_public_address
2812  */
2813 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2814                                 void *private_data)
2815 {
2816         talloc_free(private_data);
2817 }
2818
2819 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2820 {
2821         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2822         struct ctdb_vnn *vnn;
2823         int ret;
2824
2825         /* verify the size of indata */
2826         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2827                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2828                 return -1;
2829         }
2830         if (indata.dsize != 
2831                 ( offsetof(struct ctdb_control_ip_iface, iface)
2832                 + pub->len ) ){
2833
2834                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2835                         "but should be %u bytes\n", 
2836                          (unsigned)indata.dsize, 
2837                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2838                 return -1;
2839         }
2840
2841         /* walk over all public addresses until we find a match */
2842         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2843                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2844                         TALLOC_CTX *mem_ctx;
2845
2846                         DLIST_REMOVE(ctdb->vnn, vnn);
2847                         if (vnn->iface == NULL) {
2848                                 talloc_free(vnn);
2849                                 return 0;
2850                         }
2851
2852                         mem_ctx = talloc_new(ctdb);
2853                         ret = ctdb_event_script_callback(ctdb, 
2854                                          mem_ctx, delete_ip_callback, mem_ctx,
2855                                          false,
2856                                          CTDB_EVENT_RELEASE_IP,
2857                                          "%s %s %u",
2858                                          ctdb_vnn_iface_string(vnn),
2859                                          ctdb_addr_to_str(&vnn->public_address),
2860                                          vnn->public_netmask_bits);
2861                         ctdb_vnn_unassign_iface(ctdb, vnn);
2862                         talloc_free(vnn);
2863                         if (ret != 0) {
2864                                 return -1;
2865                         }
2866                         return 0;
2867                 }
2868         }
2869
2870         return -1;
2871 }
2872
2873 /* This function is called from the recovery daemon to verify that a remote
2874    node has the expected ip allocation.
2875    This is verified against ctdb->ip_tree
2876 */
2877 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2878 {
2879         struct ctdb_public_ip_list *tmp_ip; 
2880         int i;
2881
2882         if (ctdb->ip_tree == NULL) {
2883                 /* dont know the expected allocation yet, assume remote node
2884                    is correct. */
2885                 return 0;
2886         }
2887
2888         if (ips == NULL) {
2889                 return 0;
2890         }
2891
2892         for (i=0; i<ips->num; i++) {
2893                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2894                 if (tmp_ip == NULL) {
2895                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2896                         return -1;
2897                 }
2898
2899                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2900                         continue;
2901                 }
2902
2903                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2904                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2905                         return -1;
2906                 }
2907         }
2908
2909         return 0;
2910 }
2911
2912 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2913 {
2914         struct ctdb_public_ip_list *tmp_ip; 
2915
2916         if (ctdb->ip_tree == NULL) {
2917                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2918                 return -1;
2919         }
2920
2921         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2922         if (tmp_ip == NULL) {
2923                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2924                 return -1;
2925         }
2926
2927         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2928         tmp_ip->pnn = ip->pnn;
2929
2930         return 0;
2931 }