add missing checks on so far ignored return values
[samba.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_takeover_arp {
37         struct ctdb_context *ctdb;
38         uint32_t count;
39         ctdb_sock_addr addr;
40         struct ctdb_tcp_array *tcparray;
41         struct ctdb_vnn *vnn;
42 };
43
44
45 /*
46   lists of tcp endpoints
47  */
48 struct ctdb_tcp_list {
49         struct ctdb_tcp_list *prev, *next;
50         struct ctdb_tcp_connection connection;
51 };
52
53 /*
54   list of clients to kill on IP release
55  */
56 struct ctdb_client_ip {
57         struct ctdb_client_ip *prev, *next;
58         struct ctdb_context *ctdb;
59         ctdb_sock_addr addr;
60         uint32_t client_id;
61 };
62
63
64 /*
65   send a gratuitous arp
66  */
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
68                                   struct timeval t, void *private_data)
69 {
70         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
71                                                         struct ctdb_takeover_arp);
72         int i, ret;
73         struct ctdb_tcp_array *tcparray;
74
75         ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
76         if (ret != 0) {
77                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
78         }
79
80         tcparray = arp->tcparray;
81         if (tcparray) {
82                 for (i=0;i<tcparray->num;i++) {
83                         struct ctdb_tcp_connection *tcon;
84
85                         tcon = &tcparray->connections[i];
86                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
88                                 ctdb_addr_to_str(&tcon->src_addr),
89                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90                         ret = ctdb_sys_send_tcp(
91                                 &tcon->src_addr, 
92                                 &tcon->dst_addr,
93                                 0, 0, 0);
94                         if (ret != 0) {
95                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96                                         ctdb_addr_to_str(&tcon->src_addr)));
97                         }
98                 }
99         }
100
101         arp->count++;
102
103         if (arp->count == CTDB_ARP_REPEAT) {
104                 talloc_free(arp);
105                 return;
106         }
107
108         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
109                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
110                         ctdb_control_send_arp, arp);
111 }
112
113 struct takeover_callback_state {
114         struct ctdb_req_control *c;
115         ctdb_sock_addr *addr;
116         struct ctdb_vnn *vnn;
117 };
118
119 /*
120   called when takeip event finishes
121  */
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status, 
123                                  void *private_data)
124 {
125         struct takeover_callback_state *state = 
126                 talloc_get_type(private_data, struct takeover_callback_state);
127         struct ctdb_takeover_arp *arp;
128         struct ctdb_tcp_array *tcparray;
129
130         if (status != 0) {
131                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132                         ctdb_addr_to_str(state->addr),
133                         state->vnn->iface));
134                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
135                 talloc_free(state);
136                 return;
137         }
138
139         if (!state->vnn->takeover_ctx) {
140                 state->vnn->takeover_ctx = talloc_new(ctdb);
141                 if (!state->vnn->takeover_ctx) {
142                         goto failed;
143                 }
144         }
145
146         arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147         if (!arp) goto failed;
148         
149         arp->ctdb = ctdb;
150         arp->addr = *state->addr;
151         arp->vnn  = state->vnn;
152
153         tcparray = state->vnn->tcp_array;
154         if (tcparray) {
155                 /* add all of the known tcp connections for this IP to the
156                    list of tcp connections to send tickle acks for */
157                 arp->tcparray = talloc_steal(arp, tcparray);
158
159                 state->vnn->tcp_array = NULL;
160                 state->vnn->tcp_update_needed = true;
161         }
162
163         event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx, 
164                         timeval_zero(), ctdb_control_send_arp, arp);
165
166         /* the control succeeded */
167         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
168         talloc_free(state);
169         return;
170
171 failed:
172         ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
173         talloc_free(state);
174         return;
175 }
176
177 /*
178   Find the vnn of the node that has a public ip address
179   returns -1 if the address is not known as a public address
180  */
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
182 {
183         struct ctdb_vnn *vnn;
184
185         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186                 if (ctdb_same_ip(&vnn->public_address, addr)) {
187                         return vnn;
188                 }
189         }
190
191         return NULL;
192 }
193
194
195 /*
196   take over an ip address
197  */
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
199                                  struct ctdb_req_control *c,
200                                  TDB_DATA indata, 
201                                  bool *async_reply)
202 {
203         int ret;
204         struct takeover_callback_state *state;
205         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206         struct ctdb_vnn *vnn;
207
208         /* update out vnn list */
209         vnn = find_public_ip_vnn(ctdb, &pip->addr);
210         if (vnn == NULL) {
211                 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n", 
212                         ctdb_addr_to_str(&pip->addr)));
213                 return 0;
214         }
215         vnn->pnn = pip->pnn;
216
217         /* if our kernel already has this IP, do nothing */
218         if (ctdb_sys_have_ip(&pip->addr)) {
219                 return 0;
220         }
221
222         state = talloc(ctdb, struct takeover_callback_state);
223         CTDB_NO_MEMORY(ctdb, state);
224
225         state->c = talloc_steal(ctdb, c);
226         state->addr = talloc(ctdb, ctdb_sock_addr);
227         CTDB_NO_MEMORY(ctdb, state->addr);
228
229         *state->addr = pip->addr;
230         state->vnn   = vnn;
231
232         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", 
233                 ctdb_addr_to_str(&pip->addr),
234                 vnn->public_netmask_bits, 
235                 vnn->iface));
236
237         ret = ctdb_event_script_callback(ctdb, 
238                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
239                                          state, takeover_ip_callback, state,
240                                          "takeip %s %s %u",
241                                          vnn->iface, 
242                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
243                                          vnn->public_netmask_bits);
244
245         if (ret != 0) {
246                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
247                         ctdb_addr_to_str(&pip->addr),
248                         vnn->iface));
249                 talloc_free(state);
250                 return -1;
251         }
252
253         /* tell ctdb_control.c that we will be replying asynchronously */
254         *async_reply = true;
255
256         return 0;
257 }
258
259 /*
260   takeover an ip address old v4 style
261  */
262 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
263                                 struct ctdb_req_control *c,
264                                 TDB_DATA indata, 
265                                 bool *async_reply)
266 {
267         TDB_DATA data;
268         
269         data.dsize = sizeof(struct ctdb_public_ip);
270         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
271         CTDB_NO_MEMORY(ctdb, data.dptr);
272         
273         memcpy(data.dptr, indata.dptr, indata.dsize);
274         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
275 }
276
277 /*
278   kill any clients that are registered with a IP that is being released
279  */
280 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
281 {
282         struct ctdb_client_ip *ip;
283
284         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
285                 ctdb_addr_to_str(addr)));
286
287         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
288                 ctdb_sock_addr tmp_addr;
289
290                 tmp_addr = ip->addr;
291                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
292                         ip->client_id,
293                         ctdb_addr_to_str(&ip->addr)));
294
295                 if (ctdb_same_ip(&tmp_addr, addr)) {
296                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
297                                                                      ip->client_id, 
298                                                                      struct ctdb_client);
299                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
300                                 ip->client_id,
301                                 ctdb_addr_to_str(&ip->addr),
302                                 client->pid));
303
304                         if (client->pid != 0) {
305                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
306                                         (unsigned)client->pid,
307                                         ctdb_addr_to_str(addr),
308                                         ip->client_id));
309                                 kill(client->pid, SIGKILL);
310                         }
311                 }
312         }
313 }
314
315 /*
316   called when releaseip event finishes
317  */
318 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
319                                 void *private_data)
320 {
321         struct takeover_callback_state *state = 
322                 talloc_get_type(private_data, struct takeover_callback_state);
323         TDB_DATA data;
324
325         /* send a message to all clients of this node telling them
326            that the cluster has been reconfigured and they should
327            release any sockets on this IP */
328         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
329         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
330         data.dsize = strlen((char *)data.dptr)+1;
331
332         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
333
334         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
335
336         /* kill clients that have registered with this IP */
337         release_kill_clients(ctdb, state->addr);
338         
339         /* the control succeeded */
340         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
341         talloc_free(state);
342 }
343
344 /*
345   release an ip address
346  */
347 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
348                                 struct ctdb_req_control *c,
349                                 TDB_DATA indata, 
350                                 bool *async_reply)
351 {
352         int ret;
353         struct takeover_callback_state *state;
354         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
355         struct ctdb_vnn *vnn;
356
357         /* update our vnn list */
358         vnn = find_public_ip_vnn(ctdb, &pip->addr);
359         if (vnn == NULL) {
360                 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
361                         ctdb_addr_to_str(&pip->addr)));
362                 return 0;
363         }
364         vnn->pnn = pip->pnn;
365
366         /* stop any previous arps */
367         talloc_free(vnn->takeover_ctx);
368         vnn->takeover_ctx = NULL;
369
370         if (!ctdb_sys_have_ip(&pip->addr)) {
371                 DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
372                         ctdb_addr_to_str(&pip->addr),
373                         vnn->public_netmask_bits, 
374                         vnn->iface));
375                 return 0;
376         }
377
378         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n", 
379                 ctdb_addr_to_str(&pip->addr),
380                 vnn->public_netmask_bits, 
381                 vnn->iface));
382
383         state = talloc(ctdb, struct takeover_callback_state);
384         CTDB_NO_MEMORY(ctdb, state);
385
386         state->c = talloc_steal(state, c);
387         state->addr = talloc(state, ctdb_sock_addr);       
388         CTDB_NO_MEMORY(ctdb, state->addr);
389         *state->addr = pip->addr;
390         state->vnn   = vnn;
391
392         ret = ctdb_event_script_callback(ctdb, 
393                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
394                                          state, release_ip_callback, state,
395                                          "releaseip %s %s %u",
396                                          vnn->iface, 
397                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
398                                          vnn->public_netmask_bits);
399         if (ret != 0) {
400                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
401                         ctdb_addr_to_str(&pip->addr),
402                         vnn->iface));
403                 talloc_free(state);
404                 return -1;
405         }
406
407         /* tell the control that we will be reply asynchronously */
408         *async_reply = true;
409         return 0;
410 }
411
412 /*
413   release an ip address old v4 style
414  */
415 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
416                                 struct ctdb_req_control *c,
417                                 TDB_DATA indata, 
418                                 bool *async_reply)
419 {
420         TDB_DATA data;
421         
422         data.dsize = sizeof(struct ctdb_public_ip);
423         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
424         CTDB_NO_MEMORY(ctdb, data.dptr);
425         
426         memcpy(data.dptr, indata.dptr, indata.dsize);
427         return ctdb_control_release_ip(ctdb, c, data, async_reply);
428 }
429
430
431 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
432 {
433         struct ctdb_vnn      *vnn;
434
435         /* Verify that we dont have an entry for this ip yet */
436         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
437                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
438                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
439                                 ctdb_addr_to_str(addr)));
440                         return -1;
441                 }               
442         }
443
444         /* create a new vnn structure for this ip address */
445         vnn = talloc_zero(ctdb, struct ctdb_vnn);
446         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
447         vnn->iface = talloc_strdup(vnn, iface);
448         CTDB_NO_MEMORY(ctdb, vnn->iface);
449         vnn->public_address      = *addr;
450         vnn->public_netmask_bits = mask;
451         vnn->pnn                 = -1;
452         
453         DLIST_ADD(ctdb->vnn, vnn);
454
455         return 0;
456 }
457
458
459 /*
460   setup the event script directory
461 */
462 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
463 {
464         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
465         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
466         return 0;
467 }
468
469 /*
470   setup the public address lists from a file
471 */
472 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
473 {
474         char **lines;
475         int nlines;
476         int i;
477
478         lines = file_lines_load(alist, &nlines, ctdb);
479         if (lines == NULL) {
480                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
481                 return -1;
482         }
483         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
484                 nlines--;
485         }
486
487         for (i=0;i<nlines;i++) {
488                 unsigned mask;
489                 ctdb_sock_addr addr;
490                 const char *addrstr;
491                 const char *iface;
492                 char *tok, *line;
493
494                 line = lines[i];
495                 while ((*line == ' ') || (*line == '\t')) {
496                         line++;
497                 }
498                 if (*line == '#') {
499                         continue;
500                 }
501                 if (strcmp(line, "") == 0) {
502                         continue;
503                 }
504                 tok = strtok(line, " \t");
505                 addrstr = tok;
506                 tok = strtok(NULL, " \t");
507                 if (tok == NULL) {
508                         if (NULL == ctdb->default_public_interface) {
509                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
510                                          i+1));
511                                 talloc_free(lines);
512                                 return -1;
513                         }
514                         iface = ctdb->default_public_interface;
515                 } else {
516                         iface = tok;
517                 }
518
519                 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
520                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
521                         talloc_free(lines);
522                         return -1;
523                 }
524                 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
525                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
526                         talloc_free(lines);
527                         return -1;
528                 }
529         }
530
531         talloc_free(lines);
532         return 0;
533 }
534
535
536
537
538 struct ctdb_public_ip_list {
539         struct ctdb_public_ip_list *next;
540         uint32_t pnn;
541         ctdb_sock_addr addr;
542 };
543
544
545 /* Given a physical node, return the number of
546    public addresses that is currently assigned to this node.
547 */
548 static int node_ip_coverage(struct ctdb_context *ctdb, 
549         int32_t pnn,
550         struct ctdb_public_ip_list *ips)
551 {
552         int num=0;
553
554         for (;ips;ips=ips->next) {
555                 if (ips->pnn == pnn) {
556                         num++;
557                 }
558         }
559         return num;
560 }
561
562
563 /* Check if this is a public ip known to the node, i.e. can that
564    node takeover this ip ?
565 */
566 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
567                 struct ctdb_public_ip_list *ip)
568 {
569         struct ctdb_all_public_ips *public_ips;
570         int i;
571
572         public_ips = ctdb->nodes[pnn]->public_ips;
573
574         if (public_ips == NULL) {
575                 return -1;
576         }
577
578         for (i=0;i<public_ips->num;i++) {
579                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
580                         /* yes, this node can serve this public ip */
581                         return 0;
582                 }
583         }
584
585         return -1;
586 }
587
588
589 /* search the node lists list for a node to takeover this ip.
590    pick the node that currently are serving the least number of ips
591    so that the ips get spread out evenly.
592 */
593 static int find_takeover_node(struct ctdb_context *ctdb, 
594                 struct ctdb_node_map *nodemap, uint32_t mask, 
595                 struct ctdb_public_ip_list *ip,
596                 struct ctdb_public_ip_list *all_ips)
597 {
598         int pnn, min=0, num;
599         int i;
600
601         pnn    = -1;
602         for (i=0;i<nodemap->num;i++) {
603                 if (nodemap->nodes[i].flags & mask) {
604                         /* This node is not healty and can not be used to serve
605                            a public address 
606                         */
607                         continue;
608                 }
609
610                 /* verify that this node can serve this ip */
611                 if (can_node_serve_ip(ctdb, i, ip)) {
612                         /* no it couldnt   so skip to the next node */
613                         continue;
614                 }
615
616                 num = node_ip_coverage(ctdb, i, all_ips);
617                 /* was this the first node we checked ? */
618                 if (pnn == -1) {
619                         pnn = i;
620                         min  = num;
621                 } else {
622                         if (num < min) {
623                                 pnn = i;
624                                 min  = num;
625                         }
626                 }
627         }       
628         if (pnn == -1) {
629                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
630                         ctdb_addr_to_str(&ip->addr)));
631
632                 return -1;
633         }
634
635         ip->pnn = pnn;
636         return 0;
637 }
638
639 struct ctdb_public_ip_list *
640 add_ip_to_merged_list(struct ctdb_context *ctdb,
641                         TALLOC_CTX *tmp_ctx, 
642                         struct ctdb_public_ip_list *ip_list, 
643                         struct ctdb_public_ip *ip)
644 {
645         struct ctdb_public_ip_list *tmp_ip; 
646
647         /* do we already have this ip in our merged list ?*/
648         for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
649
650                 /* we already have this public ip in the list */
651                 if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
652                         return ip_list;
653                 }
654         }
655
656         /* this is a new public ip, we must add it to the list */
657         tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
658         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
659         tmp_ip->pnn  = ip->pnn;
660         tmp_ip->addr = ip->addr;
661         tmp_ip->next = ip_list;
662
663         return tmp_ip;
664 }
665
666 struct ctdb_public_ip_list *
667 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
668 {
669         int i, j;
670         struct ctdb_public_ip_list *ip_list = NULL;
671         struct ctdb_all_public_ips *public_ips;
672
673         for (i=0;i<ctdb->num_nodes;i++) {
674                 public_ips = ctdb->nodes[i]->public_ips;
675
676                 /* there were no public ips for this node */
677                 if (public_ips == NULL) {
678                         continue;
679                 }               
680
681                 for (j=0;j<public_ips->num;j++) {
682                         ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
683                                         ip_list, &public_ips->ips[j]);
684                 }
685         }
686
687         return ip_list;
688 }
689
690 /*
691   make any IP alias changes for public addresses that are necessary 
692  */
693 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
694 {
695         int i, num_healthy, retries;
696         struct ctdb_public_ip ip;
697         struct ctdb_public_ipv4 ipv4;
698         uint32_t mask;
699         struct ctdb_public_ip_list *all_ips, *tmp_ip;
700         int maxnode, maxnum=0, minnode, minnum=0, num;
701         TDB_DATA data;
702         struct timeval timeout;
703         struct client_async_data *async_data;
704         struct ctdb_client_control_state *state;
705         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
706
707
708         ZERO_STRUCT(ip);
709
710         /* Count how many completely healthy nodes we have */
711         num_healthy = 0;
712         for (i=0;i<nodemap->num;i++) {
713                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
714                         num_healthy++;
715                 }
716         }
717
718         if (num_healthy > 0) {
719                 /* We have healthy nodes, so only consider them for 
720                    serving public addresses
721                 */
722                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
723         } else {
724                 /* We didnt have any completely healthy nodes so
725                    use "disabled" nodes as a fallback
726                 */
727                 mask = NODE_FLAGS_INACTIVE;
728         }
729
730         /* since nodes only know about those public addresses that
731            can be served by that particular node, no single node has
732            a full list of all public addresses that exist in the cluster.
733            Walk over all node structures and create a merged list of
734            all public addresses that exist in the cluster.
735         */
736         all_ips = create_merged_ip_list(ctdb, tmp_ctx);
737
738         /* If we want deterministic ip allocations, i.e. that the ip addresses
739            will always be allocated the same way for a specific set of
740            available/unavailable nodes.
741         */
742         if (1 == ctdb->tunable.deterministic_public_ips) {              
743                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
744                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
745                         tmp_ip->pnn = i%nodemap->num;
746                 }
747         }
748
749
750         /* mark all public addresses with a masked node as being served by
751            node -1
752         */
753         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
754                 if (tmp_ip->pnn == -1) {
755                         continue;
756                 }
757                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
758                         tmp_ip->pnn = -1;
759                 }
760         }
761
762         /* verify that the assigned nodes can serve that public ip
763            and set it to -1 if not
764         */
765         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
766                 if (tmp_ip->pnn == -1) {
767                         continue;
768                 }
769                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
770                         /* this node can not serve this ip. */
771                         tmp_ip->pnn = -1;
772                 }
773         }
774
775
776         /* now we must redistribute all public addresses with takeover node
777            -1 among the nodes available
778         */
779         retries = 0;
780 try_again:
781         /* loop over all ip's and find a physical node to cover for 
782            each unassigned ip.
783         */
784         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
785                 if (tmp_ip->pnn == -1) {
786                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
787                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
788                                         ctdb_addr_to_str(&tmp_ip->addr)));
789                         }
790                 }
791         }
792
793         /* If we dont want ips to fail back after a node becomes healthy
794            again, we wont even try to reallocat the ip addresses so that
795            they are evenly spread out.
796            This can NOT be used at the same time as DeterministicIPs !
797         */
798         if (1 == ctdb->tunable.no_ip_failback) {
799                 if (1 == ctdb->tunable.deterministic_public_ips) {
800                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
801                 }
802                 goto finished;
803         }
804
805
806         /* now, try to make sure the ip adresses are evenly distributed
807            across the node.
808            for each ip address, loop over all nodes that can serve this
809            ip and make sure that the difference between the node
810            serving the most and the node serving the least ip's are not greater
811            than 1.
812         */
813         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
814                 if (tmp_ip->pnn == -1) {
815                         continue;
816                 }
817
818                 /* Get the highest and lowest number of ips's served by any 
819                    valid node which can serve this ip.
820                 */
821                 maxnode = -1;
822                 minnode = -1;
823                 for (i=0;i<nodemap->num;i++) {
824                         if (nodemap->nodes[i].flags & mask) {
825                                 continue;
826                         }
827
828                         /* only check nodes that can actually serve this ip */
829                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
830                                 /* no it couldnt   so skip to the next node */
831                                 continue;
832                         }
833
834                         num = node_ip_coverage(ctdb, i, all_ips);
835                         if (maxnode == -1) {
836                                 maxnode = i;
837                                 maxnum  = num;
838                         } else {
839                                 if (num > maxnum) {
840                                         maxnode = i;
841                                         maxnum  = num;
842                                 }
843                         }
844                         if (minnode == -1) {
845                                 minnode = i;
846                                 minnum  = num;
847                         } else {
848                                 if (num < minnum) {
849                                         minnode = i;
850                                         minnum  = num;
851                                 }
852                         }
853                 }
854                 if (maxnode == -1) {
855                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
856                                 ctdb_addr_to_str(&tmp_ip->addr)));
857
858                         continue;
859                 }
860
861                 /* If we want deterministic IPs then dont try to reallocate 
862                    them to spread out the load.
863                 */
864                 if (1 == ctdb->tunable.deterministic_public_ips) {
865                         continue;
866                 }
867
868                 /* if the spread between the smallest and largest coverage by
869                    a node is >=2 we steal one of the ips from the node with
870                    most coverage to even things out a bit.
871                    try to do this at most 5 times  since we dont want to spend
872                    too much time balancing the ip coverage.
873                 */
874                 if ( (maxnum > minnum+1)
875                   && (retries < 5) ){
876                         struct ctdb_public_ip_list *tmp;
877
878                         /* mark one of maxnode's vnn's as unassigned and try
879                            again
880                         */
881                         for (tmp=all_ips;tmp;tmp=tmp->next) {
882                                 if (tmp->pnn == maxnode) {
883                                         tmp->pnn = -1;
884                                         retries++;
885                                         goto try_again;
886                                 }
887                         }
888                 }
889         }
890
891
892         /* finished distributing the public addresses, now just send the 
893            info out to the nodes
894         */
895 finished:
896
897         /* at this point ->pnn is the node which will own each IP
898            or -1 if there is no node that can cover this ip
899         */
900
901         /* now tell all nodes to delete any alias that they should not
902            have.  This will be a NOOP on nodes that don't currently
903            hold the given alias */
904         async_data = talloc_zero(tmp_ctx, struct client_async_data);
905         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
906
907         for (i=0;i<nodemap->num;i++) {
908                 /* don't talk to unconnected nodes, but do talk to banned nodes */
909                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
910                         continue;
911                 }
912
913                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
914                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
915                                 /* This node should be serving this
916                                    vnn so dont tell it to release the ip
917                                 */
918                                 continue;
919                         }
920                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
921                                 ipv4.pnn = tmp_ip->pnn;
922                                 ipv4.sin = tmp_ip->addr.ip;
923
924                                 timeout = TAKEOVER_TIMEOUT();
925                                 data.dsize = sizeof(ipv4);
926                                 data.dptr  = (uint8_t *)&ipv4;
927                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
928                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
929                                                 data, async_data,
930                                                 &timeout, NULL);
931                         } else {
932                                 ip.pnn  = tmp_ip->pnn;
933                                 ip.addr = tmp_ip->addr;
934
935                                 timeout = TAKEOVER_TIMEOUT();
936                                 data.dsize = sizeof(ip);
937                                 data.dptr  = (uint8_t *)&ip;
938                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
939                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
940                                                 data, async_data,
941                                                 &timeout, NULL);
942                         }
943
944                         if (state == NULL) {
945                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
946                                 talloc_free(tmp_ctx);
947                                 return -1;
948                         }
949                 
950                         ctdb_client_async_add(async_data, state);
951                 }
952         }
953         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
954                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
955                 talloc_free(tmp_ctx);
956                 return -1;
957         }
958         talloc_free(async_data);
959
960
961         /* tell all nodes to get their own IPs */
962         async_data = talloc_zero(tmp_ctx, struct client_async_data);
963         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
964         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
965                 if (tmp_ip->pnn == -1) {
966                         /* this IP won't be taken over */
967                         continue;
968                 }
969
970                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
971                         ipv4.pnn = tmp_ip->pnn;
972                         ipv4.sin = tmp_ip->addr.ip;
973
974                         timeout = TAKEOVER_TIMEOUT();
975                         data.dsize = sizeof(ipv4);
976                         data.dptr  = (uint8_t *)&ipv4;
977                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
978                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
979                                         data, async_data,
980                                         &timeout, NULL);
981                 } else {
982                         ip.pnn  = tmp_ip->pnn;
983                         ip.addr = tmp_ip->addr;
984
985                         timeout = TAKEOVER_TIMEOUT();
986                         data.dsize = sizeof(ip);
987                         data.dptr  = (uint8_t *)&ip;
988                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
989                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
990                                         data, async_data,
991                                         &timeout, NULL);
992                 }
993                 if (state == NULL) {
994                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
995                         talloc_free(tmp_ctx);
996                         return -1;
997                 }
998                 
999                 ctdb_client_async_add(async_data, state);
1000         }
1001         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1002                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1003                 talloc_free(tmp_ctx);
1004                 return -1;
1005         }
1006
1007         talloc_free(tmp_ctx);
1008         return 0;
1009 }
1010
1011
1012 /*
1013   destroy a ctdb_client_ip structure
1014  */
1015 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1016 {
1017         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1018                 ctdb_addr_to_str(&ip->addr),
1019                 ntohs(ip->addr.ip.sin_port),
1020                 ip->client_id));
1021
1022         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1023         return 0;
1024 }
1025
1026 /*
1027   called by a client to inform us of a TCP connection that it is managing
1028   that should tickled with an ACK when IP takeover is done
1029   we handle both the old ipv4 style of packets as well as the new ipv4/6
1030   pdus.
1031  */
1032 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1033                                 TDB_DATA indata)
1034 {
1035         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1036         struct ctdb_control_tcp *old_addr = NULL;
1037         struct ctdb_control_tcp_addr new_addr;
1038         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1039         struct ctdb_tcp_list *tcp;
1040         struct ctdb_control_tcp_vnn t;
1041         int ret;
1042         TDB_DATA data;
1043         struct ctdb_client_ip *ip;
1044         struct ctdb_vnn *vnn;
1045         ctdb_sock_addr addr;
1046
1047         switch (indata.dsize) {
1048         case sizeof(struct ctdb_control_tcp):
1049                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1050                 ZERO_STRUCT(new_addr);
1051                 tcp_sock = &new_addr;
1052                 tcp_sock->src.ip  = old_addr->src;
1053                 tcp_sock->dest.ip = old_addr->dest;
1054                 break;
1055         case sizeof(struct ctdb_control_tcp_addr):
1056                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1057                 break;
1058         default:
1059                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed to ctdb_control_tcp_client. size was %d but only allowed sizes are %lu and %lu\n", (int)indata.dsize, sizeof(struct ctdb_control_tcp), sizeof(struct ctdb_control_tcp_addr)));
1060                 return -1;
1061         }
1062
1063         addr = tcp_sock->src;
1064         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1065         addr = tcp_sock->dest;
1066         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1067
1068         ZERO_STRUCT(addr);
1069         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1070         vnn = find_public_ip_vnn(ctdb, &addr);
1071         if (vnn == NULL) {
1072                 switch (addr.sa.sa_family) {
1073                 case AF_INET:
1074                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1075                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1076                                         ctdb_addr_to_str(&addr)));
1077                         }
1078                         break;
1079                 case AF_INET6:
1080                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1081                                 ctdb_addr_to_str(&addr)));
1082                         break;
1083                 default:
1084                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1085                 }
1086
1087                 return 0;
1088         }
1089
1090         if (vnn->pnn != ctdb->pnn) {
1091                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1092                         ctdb_addr_to_str(&addr),
1093                         client_id, client->pid));
1094                 /* failing this call will tell smbd to die */
1095                 return -1;
1096         }
1097
1098         ip = talloc(client, struct ctdb_client_ip);
1099         CTDB_NO_MEMORY(ctdb, ip);
1100
1101         ip->ctdb      = ctdb;
1102         ip->addr      = addr;
1103         ip->client_id = client_id;
1104         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1105         DLIST_ADD(ctdb->client_ip_list, ip);
1106
1107         tcp = talloc(client, struct ctdb_tcp_list);
1108         CTDB_NO_MEMORY(ctdb, tcp);
1109
1110         tcp->connection.src_addr = tcp_sock->src;
1111         tcp->connection.dst_addr = tcp_sock->dest;
1112
1113         DLIST_ADD(client->tcp_list, tcp);
1114
1115         t.src  = tcp_sock->src;
1116         t.dest = tcp_sock->dest;
1117
1118         data.dptr = (uint8_t *)&t;
1119         data.dsize = sizeof(t);
1120
1121         switch (addr.sa.sa_family) {
1122         case AF_INET:
1123                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1124                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1125                         ctdb_addr_to_str(&tcp_sock->src),
1126                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1127                 break;
1128         case AF_INET6:
1129                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1130                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1131                         ctdb_addr_to_str(&tcp_sock->src),
1132                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1133                 break;
1134         default:
1135                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1136         }
1137
1138
1139         /* tell all nodes about this tcp connection */
1140         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1141                                        CTDB_CONTROL_TCP_ADD,
1142                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1143         if (ret != 0) {
1144                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1145                 return -1;
1146         }
1147
1148         return 0;
1149 }
1150
1151 /*
1152   find a tcp address on a list
1153  */
1154 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1155                                            struct ctdb_tcp_connection *tcp)
1156 {
1157         int i;
1158
1159         if (array == NULL) {
1160                 return NULL;
1161         }
1162
1163         for (i=0;i<array->num;i++) {
1164                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1165                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1166                         return &array->connections[i];
1167                 }
1168         }
1169         return NULL;
1170 }
1171
1172 /*
1173   called by a daemon to inform us of a TCP connection that one of its
1174   clients managing that should tickled with an ACK when IP takeover is
1175   done
1176  */
1177 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1178 {
1179         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1180         struct ctdb_tcp_array *tcparray;
1181         struct ctdb_tcp_connection tcp;
1182         struct ctdb_vnn *vnn;
1183
1184         vnn = find_public_ip_vnn(ctdb, &p->dest);
1185         if (vnn == NULL) {
1186                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1187                         ctdb_addr_to_str(&p->dest)));
1188
1189                 return -1;
1190         }
1191
1192
1193         tcparray = vnn->tcp_array;
1194
1195         /* If this is the first tickle */
1196         if (tcparray == NULL) {
1197                 tcparray = talloc_size(ctdb->nodes, 
1198                         offsetof(struct ctdb_tcp_array, connections) +
1199                         sizeof(struct ctdb_tcp_connection) * 1);
1200                 CTDB_NO_MEMORY(ctdb, tcparray);
1201                 vnn->tcp_array = tcparray;
1202
1203                 tcparray->num = 0;
1204                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1205                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1206
1207                 tcparray->connections[tcparray->num].src_addr = p->src;
1208                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1209                 tcparray->num++;
1210                 return 0;
1211         }
1212
1213
1214         /* Do we already have this tickle ?*/
1215         tcp.src_addr = p->src;
1216         tcp.dst_addr = p->dest;
1217         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1218                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1219                         ctdb_addr_to_str(&tcp.dst_addr),
1220                         ntohs(tcp.dst_addr.ip.sin_port),
1221                         vnn->pnn));
1222                 return 0;
1223         }
1224
1225         /* A new tickle, we must add it to the array */
1226         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1227                                         struct ctdb_tcp_connection,
1228                                         tcparray->num+1);
1229         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1230
1231         vnn->tcp_array = tcparray;
1232         tcparray->connections[tcparray->num].src_addr = p->src;
1233         tcparray->connections[tcparray->num].dst_addr = p->dest;
1234         tcparray->num++;
1235                                 
1236         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1237                 ctdb_addr_to_str(&tcp.dst_addr),
1238                 ntohs(tcp.dst_addr.ip.sin_port),
1239                 vnn->pnn));
1240
1241         return 0;
1242 }
1243
1244
1245 /*
1246   called by a daemon to inform us of a TCP connection that one of its
1247   clients managing that should tickled with an ACK when IP takeover is
1248   done
1249  */
1250 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1251 {
1252         struct ctdb_tcp_connection *tcpp;
1253         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1254
1255         if (vnn == NULL) {
1256                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1257                         ctdb_addr_to_str(&conn->dst_addr)));
1258                 return;
1259         }
1260
1261         /* if the array is empty we cant remove it
1262            and we dont need to do anything
1263          */
1264         if (vnn->tcp_array == NULL) {
1265                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1266                         ctdb_addr_to_str(&conn->dst_addr),
1267                         ntohs(conn->dst_addr.ip.sin_port)));
1268                 return;
1269         }
1270
1271
1272         /* See if we know this connection
1273            if we dont know this connection  then we dont need to do anything
1274          */
1275         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1276         if (tcpp == NULL) {
1277                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1278                         ctdb_addr_to_str(&conn->dst_addr),
1279                         ntohs(conn->dst_addr.ip.sin_port)));
1280                 return;
1281         }
1282
1283
1284         /* We need to remove this entry from the array.
1285            Instead of allocating a new array and copying data to it
1286            we cheat and just copy the last entry in the existing array
1287            to the entry that is to be removed and just shring the 
1288            ->num field
1289          */
1290         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1291         vnn->tcp_array->num--;
1292
1293         /* If we deleted the last entry we also need to remove the entire array
1294          */
1295         if (vnn->tcp_array->num == 0) {
1296                 talloc_free(vnn->tcp_array);
1297                 vnn->tcp_array = NULL;
1298         }               
1299
1300         vnn->tcp_update_needed = true;
1301
1302         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1303                 ctdb_addr_to_str(&conn->src_addr),
1304                 ntohs(conn->src_addr.ip.sin_port)));
1305 }
1306
1307
1308 /*
1309   called when a daemon restarts - send all tickes for all public addresses
1310   we are serving immediately to the new node.
1311  */
1312 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1313 {
1314 /*XXX here we should send all tickes we are serving to the new node */
1315         return 0;
1316 }
1317
1318
1319 /*
1320   called when a client structure goes away - hook to remove
1321   elements from the tcp_list in all daemons
1322  */
1323 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1324 {
1325         while (client->tcp_list) {
1326                 struct ctdb_tcp_list *tcp = client->tcp_list;
1327                 DLIST_REMOVE(client->tcp_list, tcp);
1328                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1329         }
1330 }
1331
1332
1333 /*
1334   release all IPs on shutdown
1335  */
1336 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1337 {
1338         struct ctdb_vnn *vnn;
1339
1340         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1341                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1342                         continue;
1343                 }
1344                 if (vnn->pnn == ctdb->pnn) {
1345                         vnn->pnn = -1;
1346                 }
1347                 ctdb_event_script(ctdb, "releaseip %s %s %u",
1348                                   vnn->iface, 
1349                                   talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1350                                   vnn->public_netmask_bits);
1351                 release_kill_clients(ctdb, &vnn->public_address);
1352         }
1353 }
1354
1355
1356 /*
1357   get list of public IPs
1358  */
1359 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1360                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1361 {
1362         int i, num, len;
1363         struct ctdb_all_public_ips *ips;
1364         struct ctdb_vnn *vnn;
1365
1366         /* count how many public ip structures we have */
1367         num = 0;
1368         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1369                 num++;
1370         }
1371
1372         len = offsetof(struct ctdb_all_public_ips, ips) + 
1373                 num*sizeof(struct ctdb_public_ip);
1374         ips = talloc_zero_size(outdata, len);
1375         CTDB_NO_MEMORY(ctdb, ips);
1376
1377         outdata->dsize = len;
1378         outdata->dptr  = (uint8_t *)ips;
1379
1380         ips->num = num;
1381         i = 0;
1382         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1383                 ips->ips[i].pnn  = vnn->pnn;
1384                 ips->ips[i].addr = vnn->public_address;
1385                 i++;
1386         }
1387
1388         return 0;
1389 }
1390
1391
1392 /*
1393   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1394  */
1395 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1396                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1397 {
1398         int i, num, len;
1399         struct ctdb_all_public_ipsv4 *ips;
1400         struct ctdb_vnn *vnn;
1401
1402         /* count how many public ip structures we have */
1403         num = 0;
1404         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1405                 if (vnn->public_address.sa.sa_family != AF_INET) {
1406                         continue;
1407                 }
1408                 num++;
1409         }
1410
1411         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1412                 num*sizeof(struct ctdb_public_ipv4);
1413         ips = talloc_zero_size(outdata, len);
1414         CTDB_NO_MEMORY(ctdb, ips);
1415
1416         outdata->dsize = len;
1417         outdata->dptr  = (uint8_t *)ips;
1418
1419         ips->num = num;
1420         i = 0;
1421         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1422                 if (vnn->public_address.sa.sa_family != AF_INET) {
1423                         continue;
1424                 }
1425                 ips->ips[i].pnn = vnn->pnn;
1426                 ips->ips[i].sin = vnn->public_address.ip;
1427                 i++;
1428         }
1429
1430         return 0;
1431 }
1432
1433
1434 /* 
1435    structure containing the listening socket and the list of tcp connections
1436    that the ctdb daemon is to kill
1437 */
1438 struct ctdb_kill_tcp {
1439         struct ctdb_vnn *vnn;
1440         struct ctdb_context *ctdb;
1441         int capture_fd;
1442         struct fd_event *fde;
1443         trbt_tree_t *connections;
1444         void *private_data;
1445 };
1446
1447 /*
1448   a tcp connection that is to be killed
1449  */
1450 struct ctdb_killtcp_con {
1451         ctdb_sock_addr src_addr;
1452         ctdb_sock_addr dst_addr;
1453         int count;
1454         struct ctdb_kill_tcp *killtcp;
1455 };
1456
1457 /* this function is used to create a key to represent this socketpair
1458    in the killtcp tree.
1459    this key is used to insert and lookup matching socketpairs that are
1460    to be tickled and RST
1461 */
1462 #define KILLTCP_KEYLEN  10
1463 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1464 {
1465         static uint32_t key[KILLTCP_KEYLEN];
1466
1467         bzero(key, sizeof(key));
1468
1469         if (src->sa.sa_family != dst->sa.sa_family) {
1470                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1471                 return key;
1472         }
1473         
1474         switch (src->sa.sa_family) {
1475         case AF_INET:
1476                 key[0]  = dst->ip.sin_addr.s_addr;
1477                 key[1]  = src->ip.sin_addr.s_addr;
1478                 key[2]  = dst->ip.sin_port;
1479                 key[3]  = src->ip.sin_port;
1480                 break;
1481         case AF_INET6:
1482                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
1483                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
1484                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
1485                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
1486                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
1487                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
1488                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
1489                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
1490                 key[8]  = dst->ip6.sin6_port;
1491                 key[9]  = src->ip6.sin6_port;
1492                 break;
1493         default:
1494                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1495                 return key;
1496         }
1497
1498         return key;
1499 }
1500
1501 /*
1502   called when we get a read event on the raw socket
1503  */
1504 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
1505                                 uint16_t flags, void *private_data)
1506 {
1507         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1508         struct ctdb_killtcp_con *con;
1509         ctdb_sock_addr src, dst;
1510         uint32_t ack_seq, seq;
1511
1512         if (!(flags & EVENT_FD_READ)) {
1513                 return;
1514         }
1515
1516         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1517                                 killtcp->private_data,
1518                                 &src, &dst,
1519                                 &ack_seq, &seq) != 0) {
1520                 /* probably a non-tcp ACK packet */
1521                 return;
1522         }
1523
1524         /* check if we have this guy in our list of connections
1525            to kill
1526         */
1527         con = trbt_lookuparray32(killtcp->connections, 
1528                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1529         if (con == NULL) {
1530                 /* no this was some other packet we can just ignore */
1531                 return;
1532         }
1533
1534         /* This one has been tickled !
1535            now reset him and remove him from the list.
1536          */
1537         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1538                 ntohs(con->dst_addr.ip.sin_port),
1539                 ctdb_addr_to_str(&con->src_addr),
1540                 ntohs(con->src_addr.ip.sin_port)));
1541
1542         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1543         talloc_free(con);
1544 }
1545
1546
1547 /* when traversing the list of all tcp connections to send tickle acks to
1548    (so that we can capture the ack coming back and kill the connection
1549     by a RST)
1550    this callback is called for each connection we are currently trying to kill
1551 */
1552 static void tickle_connection_traverse(void *param, void *data)
1553 {
1554         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1555
1556         /* have tried too many times, just give up */
1557         if (con->count >= 5) {
1558                 talloc_free(con);
1559                 return;
1560         }
1561
1562         /* othervise, try tickling it again */
1563         con->count++;
1564         ctdb_sys_send_tcp(
1565                 (ctdb_sock_addr *)&con->dst_addr,
1566                 (ctdb_sock_addr *)&con->src_addr,
1567                 0, 0, 0);
1568 }
1569
1570
1571 /* 
1572    called every second until all sentenced connections have been reset
1573  */
1574 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
1575                                               struct timeval t, void *private_data)
1576 {
1577         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1578
1579
1580         /* loop over all connections sending tickle ACKs */
1581         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1582
1583
1584         /* If there are no more connections to kill we can remove the
1585            entire killtcp structure
1586          */
1587         if ( (killtcp->connections == NULL) || 
1588              (killtcp->connections->root == NULL) ) {
1589                 talloc_free(killtcp);
1590                 return;
1591         }
1592
1593         /* try tickling them again in a seconds time
1594          */
1595         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1596                         ctdb_tickle_sentenced_connections, killtcp);
1597 }
1598
1599 /*
1600   destroy the killtcp structure
1601  */
1602 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1603 {
1604         killtcp->vnn->killtcp = NULL;
1605         return 0;
1606 }
1607
1608
1609 /* nothing fancy here, just unconditionally replace any existing
1610    connection structure with the new one.
1611
1612    dont even free the old one if it did exist, that one is talloc_stolen
1613    by the same node in the tree anyway and will be deleted when the new data 
1614    is deleted
1615 */
1616 static void *add_killtcp_callback(void *parm, void *data)
1617 {
1618         return parm;
1619 }
1620
1621 /*
1622   add a tcp socket to the list of connections we want to RST
1623  */
1624 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
1625                                        ctdb_sock_addr *s,
1626                                        ctdb_sock_addr *d)
1627 {
1628         ctdb_sock_addr src, dst;
1629         struct ctdb_kill_tcp *killtcp;
1630         struct ctdb_killtcp_con *con;
1631         struct ctdb_vnn *vnn;
1632
1633         ctdb_canonicalize_ip(s, &src);
1634         ctdb_canonicalize_ip(d, &dst);
1635
1636         vnn = find_public_ip_vnn(ctdb, &dst);
1637         if (vnn == NULL) {
1638                 vnn = find_public_ip_vnn(ctdb, &src);
1639         }
1640         if (vnn == NULL) {
1641                 /* if it is not a public ip   it could be our 'single ip' */
1642                 if (ctdb->single_ip_vnn) {
1643                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1644                                 vnn = ctdb->single_ip_vnn;
1645                         }
1646                 }
1647         }
1648         if (vnn == NULL) {
1649                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
1650                 return -1;
1651         }
1652
1653         killtcp = vnn->killtcp;
1654         
1655         /* If this is the first connection to kill we must allocate
1656            a new structure
1657          */
1658         if (killtcp == NULL) {
1659                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1660                 CTDB_NO_MEMORY(ctdb, killtcp);
1661
1662                 killtcp->vnn         = vnn;
1663                 killtcp->ctdb        = ctdb;
1664                 killtcp->capture_fd  = -1;
1665                 killtcp->connections = trbt_create(killtcp, 0);
1666
1667                 vnn->killtcp         = killtcp;
1668                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1669         }
1670
1671
1672
1673         /* create a structure that describes this connection we want to
1674            RST and store it in killtcp->connections
1675         */
1676         con = talloc(killtcp, struct ctdb_killtcp_con);
1677         CTDB_NO_MEMORY(ctdb, con);
1678         con->src_addr = src;
1679         con->dst_addr = dst;
1680         con->count    = 0;
1681         con->killtcp  = killtcp;
1682
1683
1684         trbt_insertarray32_callback(killtcp->connections,
1685                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1686                         add_killtcp_callback, con);
1687
1688         /* 
1689            If we dont have a socket to listen on yet we must create it
1690          */
1691         if (killtcp->capture_fd == -1) {
1692                 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1693                 if (killtcp->capture_fd == -1) {
1694                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1695                         goto failed;
1696                 }
1697         }
1698
1699
1700         if (killtcp->fde == NULL) {
1701                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
1702                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
1703                                             capture_tcp_handler, killtcp);
1704
1705                 /* We also need to set up some events to tickle all these connections
1706                    until they are all reset
1707                 */
1708                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1709                                 ctdb_tickle_sentenced_connections, killtcp);
1710         }
1711
1712         /* tickle him once now */
1713         ctdb_sys_send_tcp(
1714                 &con->dst_addr,
1715                 &con->src_addr,
1716                 0, 0, 0);
1717
1718         return 0;
1719
1720 failed:
1721         talloc_free(vnn->killtcp);
1722         vnn->killtcp = NULL;
1723         return -1;
1724 }
1725
1726 /*
1727   kill a TCP connection.
1728  */
1729 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1730 {
1731         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1732
1733         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1734 }
1735
1736 /*
1737   called by a daemon to inform us of the entire list of TCP tickles for
1738   a particular public address.
1739   this control should only be sent by the node that is currently serving
1740   that public address.
1741  */
1742 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1743 {
1744         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1745         struct ctdb_tcp_array *tcparray;
1746         struct ctdb_vnn *vnn;
1747
1748         /* We must at least have tickles.num or else we cant verify the size
1749            of the received data blob
1750          */
1751         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1752                                         tickles.connections)) {
1753                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1754                 return -1;
1755         }
1756
1757         /* verify that the size of data matches what we expect */
1758         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1759                                 tickles.connections)
1760                          + sizeof(struct ctdb_tcp_connection)
1761                                  * list->tickles.num) {
1762                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1763                 return -1;
1764         }       
1765
1766         vnn = find_public_ip_vnn(ctdb, &list->addr);
1767         if (vnn == NULL) {
1768                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
1769                         ctdb_addr_to_str(&list->addr)));
1770
1771                 return 1;
1772         }
1773
1774         /* remove any old ticklelist we might have */
1775         talloc_free(vnn->tcp_array);
1776         vnn->tcp_array = NULL;
1777
1778         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1779         CTDB_NO_MEMORY(ctdb, tcparray);
1780
1781         tcparray->num = list->tickles.num;
1782
1783         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1784         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1785
1786         memcpy(tcparray->connections, &list->tickles.connections[0], 
1787                sizeof(struct ctdb_tcp_connection)*tcparray->num);
1788
1789         /* We now have a new fresh tickle list array for this vnn */
1790         vnn->tcp_array = talloc_steal(vnn, tcparray);
1791         
1792         return 0;
1793 }
1794
1795 /*
1796   called to return the full list of tickles for the puclic address associated 
1797   with the provided vnn
1798  */
1799 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1800 {
1801         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1802         struct ctdb_control_tcp_tickle_list *list;
1803         struct ctdb_tcp_array *tcparray;
1804         int num;
1805         struct ctdb_vnn *vnn;
1806
1807         vnn = find_public_ip_vnn(ctdb, addr);
1808         if (vnn == NULL) {
1809                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
1810                         ctdb_addr_to_str(addr)));
1811
1812                 return 1;
1813         }
1814
1815         tcparray = vnn->tcp_array;
1816         if (tcparray) {
1817                 num = tcparray->num;
1818         } else {
1819                 num = 0;
1820         }
1821
1822         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1823                                 tickles.connections)
1824                         + sizeof(struct ctdb_tcp_connection) * num;
1825
1826         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1827         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1828         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1829
1830         list->addr = *addr;
1831         list->tickles.num = num;
1832         if (num) {
1833                 memcpy(&list->tickles.connections[0], tcparray->connections, 
1834                         sizeof(struct ctdb_tcp_connection) * num);
1835         }
1836
1837         return 0;
1838 }
1839
1840
1841 /*
1842   set the list of all tcp tickles for a public address
1843  */
1844 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
1845                               struct timeval timeout, uint32_t destnode, 
1846                               ctdb_sock_addr *addr,
1847                               struct ctdb_tcp_array *tcparray)
1848 {
1849         int ret, num;
1850         TDB_DATA data;
1851         struct ctdb_control_tcp_tickle_list *list;
1852
1853         if (tcparray) {
1854                 num = tcparray->num;
1855         } else {
1856                 num = 0;
1857         }
1858
1859         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1860                                 tickles.connections) +
1861                         sizeof(struct ctdb_tcp_connection) * num;
1862         data.dptr = talloc_size(ctdb, data.dsize);
1863         CTDB_NO_MEMORY(ctdb, data.dptr);
1864
1865         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1866         list->addr = *addr;
1867         list->tickles.num = num;
1868         if (tcparray) {
1869                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1870         }
1871
1872         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1873                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1874                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1875         if (ret != 0) {
1876                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1877                 return -1;
1878         }
1879
1880         talloc_free(data.dptr);
1881
1882         return ret;
1883 }
1884
1885
1886 /*
1887   perform tickle updates if required
1888  */
1889 static void ctdb_update_tcp_tickles(struct event_context *ev, 
1890                                 struct timed_event *te, 
1891                                 struct timeval t, void *private_data)
1892 {
1893         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1894         int ret;
1895         struct ctdb_vnn *vnn;
1896
1897         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1898                 /* we only send out updates for public addresses that 
1899                    we have taken over
1900                  */
1901                 if (ctdb->pnn != vnn->pnn) {
1902                         continue;
1903                 }
1904                 /* We only send out the updates if we need to */
1905                 if (!vnn->tcp_update_needed) {
1906                         continue;
1907                 }
1908                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
1909                                 TAKEOVER_TIMEOUT(),
1910                                 CTDB_BROADCAST_CONNECTED,
1911                                 &vnn->public_address,
1912                                 vnn->tcp_array);
1913                 if (ret != 0) {
1914                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1915                                 ctdb_addr_to_str(&vnn->public_address)));
1916                 }
1917         }
1918
1919         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1920                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1921                              ctdb_update_tcp_tickles, ctdb);
1922 }               
1923         
1924
1925 /*
1926   start periodic update of tcp tickles
1927  */
1928 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1929 {
1930         ctdb->tickle_update_context = talloc_new(ctdb);
1931
1932         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1933                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1934                              ctdb_update_tcp_tickles, ctdb);
1935 }
1936
1937
1938
1939
1940 struct control_gratious_arp {
1941         struct ctdb_context *ctdb;
1942         ctdb_sock_addr addr;
1943         const char *iface;
1944         int count;
1945 };
1946
1947 /*
1948   send a control_gratuitous arp
1949  */
1950 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
1951                                   struct timeval t, void *private_data)
1952 {
1953         int ret;
1954         struct control_gratious_arp *arp = talloc_get_type(private_data, 
1955                                                         struct control_gratious_arp);
1956
1957         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1958         if (ret != 0) {
1959                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
1960         }
1961
1962
1963         arp->count++;
1964         if (arp->count == CTDB_ARP_REPEAT) {
1965                 talloc_free(arp);
1966                 return;
1967         }
1968
1969         event_add_timed(arp->ctdb->ev, arp, 
1970                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
1971                         send_gratious_arp, arp);
1972 }
1973
1974
1975 /*
1976   send a gratious arp 
1977  */
1978 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
1979 {
1980         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
1981         struct control_gratious_arp *arp;
1982
1983         /* verify the size of indata */
1984         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
1985                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
1986                                  (unsigned)indata.dsize, 
1987                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
1988                 return -1;
1989         }
1990         if (indata.dsize != 
1991                 ( offsetof(struct ctdb_control_gratious_arp, iface)
1992                 + gratious_arp->len ) ){
1993
1994                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
1995                         "but should be %u bytes\n", 
1996                          (unsigned)indata.dsize, 
1997                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
1998                 return -1;
1999         }
2000
2001
2002         arp = talloc(ctdb, struct control_gratious_arp);
2003         CTDB_NO_MEMORY(ctdb, arp);
2004
2005         arp->ctdb  = ctdb;
2006         arp->addr   = gratious_arp->addr;
2007         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2008         CTDB_NO_MEMORY(ctdb, arp->iface);
2009         arp->count = 0;
2010         
2011         event_add_timed(arp->ctdb->ev, arp, 
2012                         timeval_zero(), send_gratious_arp, arp);
2013
2014         return 0;
2015 }
2016
2017 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2018 {
2019         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2020
2021
2022         /* verify the size of indata */
2023         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2024                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2025                 return -1;
2026         }
2027         if (indata.dsize != 
2028                 ( offsetof(struct ctdb_control_ip_iface, iface)
2029                 + pub->len ) ){
2030
2031                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2032                         "but should be %u bytes\n", 
2033                          (unsigned)indata.dsize, 
2034                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2035                 return -1;
2036         }
2037
2038         return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2039 }
2040
2041 /*
2042   called when releaseip event finishes for del_public_address
2043  */
2044 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2045                                 void *private_data)
2046 {
2047         talloc_free(private_data);
2048 }
2049
2050 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2051 {
2052         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2053         struct ctdb_vnn *vnn;
2054         int ret;
2055
2056         /* verify the size of indata */
2057         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2058                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2059                 return -1;
2060         }
2061         if (indata.dsize != 
2062                 ( offsetof(struct ctdb_control_ip_iface, iface)
2063                 + pub->len ) ){
2064
2065                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2066                         "but should be %u bytes\n", 
2067                          (unsigned)indata.dsize, 
2068                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2069                 return -1;
2070         }
2071
2072         /* walk over all public addresses until we find a match */
2073         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2074                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2075                         TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2076
2077                         DLIST_REMOVE(ctdb->vnn, vnn);
2078
2079                         ret = ctdb_event_script_callback(ctdb, 
2080                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
2081                                          mem_ctx, delete_ip_callback, mem_ctx,
2082                                          "releaseip %s %s %u",
2083                                          vnn->iface, 
2084                                          talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2085                                          vnn->public_netmask_bits);
2086                         talloc_free(vnn);
2087                         if (ret != 0) {
2088                                 return -1;
2089                         }
2090                         return 0;
2091                 }
2092         }
2093
2094         return -1;
2095 }
2096