make it possible to start the daemon in STOPPED mode
[metze/ctdb/wip.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_takeover_arp {
37         struct ctdb_context *ctdb;
38         uint32_t count;
39         ctdb_sock_addr addr;
40         struct ctdb_tcp_array *tcparray;
41         struct ctdb_vnn *vnn;
42 };
43
44
45 /*
46   lists of tcp endpoints
47  */
48 struct ctdb_tcp_list {
49         struct ctdb_tcp_list *prev, *next;
50         struct ctdb_tcp_connection connection;
51 };
52
53 /*
54   list of clients to kill on IP release
55  */
56 struct ctdb_client_ip {
57         struct ctdb_client_ip *prev, *next;
58         struct ctdb_context *ctdb;
59         ctdb_sock_addr addr;
60         uint32_t client_id;
61 };
62
63
64 /*
65   send a gratuitous arp
66  */
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
68                                   struct timeval t, void *private_data)
69 {
70         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
71                                                         struct ctdb_takeover_arp);
72         int i, ret;
73         struct ctdb_tcp_array *tcparray;
74
75         ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
76         if (ret != 0) {
77                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
78         }
79
80         tcparray = arp->tcparray;
81         if (tcparray) {
82                 for (i=0;i<tcparray->num;i++) {
83                         struct ctdb_tcp_connection *tcon;
84
85                         tcon = &tcparray->connections[i];
86                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
88                                 ctdb_addr_to_str(&tcon->src_addr),
89                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90                         ret = ctdb_sys_send_tcp(
91                                 &tcon->src_addr, 
92                                 &tcon->dst_addr,
93                                 0, 0, 0);
94                         if (ret != 0) {
95                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96                                         ctdb_addr_to_str(&tcon->src_addr)));
97                         }
98                 }
99         }
100
101         arp->count++;
102
103         if (arp->count == CTDB_ARP_REPEAT) {
104                 talloc_free(arp);
105                 return;
106         }
107
108         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
109                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
110                         ctdb_control_send_arp, arp);
111 }
112
113 struct takeover_callback_state {
114         struct ctdb_req_control *c;
115         ctdb_sock_addr *addr;
116         struct ctdb_vnn *vnn;
117 };
118
119 /*
120   called when takeip event finishes
121  */
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status, 
123                                  void *private_data)
124 {
125         struct takeover_callback_state *state = 
126                 talloc_get_type(private_data, struct takeover_callback_state);
127         struct ctdb_takeover_arp *arp;
128         struct ctdb_tcp_array *tcparray;
129
130         if (status != 0) {
131                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132                         ctdb_addr_to_str(state->addr),
133                         state->vnn->iface));
134                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
135                 talloc_free(state);
136                 return;
137         }
138
139         if (!state->vnn->takeover_ctx) {
140                 state->vnn->takeover_ctx = talloc_new(ctdb);
141                 if (!state->vnn->takeover_ctx) {
142                         goto failed;
143                 }
144         }
145
146         arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147         if (!arp) goto failed;
148         
149         arp->ctdb = ctdb;
150         arp->addr = *state->addr;
151         arp->vnn  = state->vnn;
152
153         tcparray = state->vnn->tcp_array;
154         if (tcparray) {
155                 /* add all of the known tcp connections for this IP to the
156                    list of tcp connections to send tickle acks for */
157                 arp->tcparray = talloc_steal(arp, tcparray);
158
159                 state->vnn->tcp_array = NULL;
160                 state->vnn->tcp_update_needed = true;
161         }
162
163         event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx, 
164                         timeval_zero(), ctdb_control_send_arp, arp);
165
166         /* the control succeeded */
167         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
168         talloc_free(state);
169         return;
170
171 failed:
172         ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
173         talloc_free(state);
174         return;
175 }
176
177 /*
178   Find the vnn of the node that has a public ip address
179   returns -1 if the address is not known as a public address
180  */
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
182 {
183         struct ctdb_vnn *vnn;
184
185         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186                 if (ctdb_same_ip(&vnn->public_address, addr)) {
187                         return vnn;
188                 }
189         }
190
191         return NULL;
192 }
193
194
195 /*
196   take over an ip address
197  */
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
199                                  struct ctdb_req_control *c,
200                                  TDB_DATA indata, 
201                                  bool *async_reply)
202 {
203         int ret;
204         struct takeover_callback_state *state;
205         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206         struct ctdb_vnn *vnn;
207
208         /* update out vnn list */
209         vnn = find_public_ip_vnn(ctdb, &pip->addr);
210         if (vnn == NULL) {
211                 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n", 
212                         ctdb_addr_to_str(&pip->addr)));
213                 return 0;
214         }
215         vnn->pnn = pip->pnn;
216
217         /* if our kernel already has this IP, do nothing */
218         if (ctdb_sys_have_ip(&pip->addr)) {
219                 return 0;
220         }
221
222         state = talloc(ctdb, struct takeover_callback_state);
223         CTDB_NO_MEMORY(ctdb, state);
224
225         state->c = talloc_steal(ctdb, c);
226         state->addr = talloc(ctdb, ctdb_sock_addr);
227         CTDB_NO_MEMORY(ctdb, state->addr);
228
229         *state->addr = pip->addr;
230         state->vnn   = vnn;
231
232         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", 
233                 ctdb_addr_to_str(&pip->addr),
234                 vnn->public_netmask_bits, 
235                 vnn->iface));
236
237         ret = ctdb_event_script_callback(ctdb, 
238                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
239                                          state, takeover_ip_callback, state,
240                                          "takeip %s %s %u",
241                                          vnn->iface, 
242                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
243                                          vnn->public_netmask_bits);
244
245         if (ret != 0) {
246                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
247                         ctdb_addr_to_str(&pip->addr),
248                         vnn->iface));
249                 talloc_free(state);
250                 return -1;
251         }
252
253         /* tell ctdb_control.c that we will be replying asynchronously */
254         *async_reply = true;
255
256         return 0;
257 }
258
259 /*
260   takeover an ip address old v4 style
261  */
262 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
263                                 struct ctdb_req_control *c,
264                                 TDB_DATA indata, 
265                                 bool *async_reply)
266 {
267         TDB_DATA data;
268         
269         data.dsize = sizeof(struct ctdb_public_ip);
270         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
271         CTDB_NO_MEMORY(ctdb, data.dptr);
272         
273         memcpy(data.dptr, indata.dptr, indata.dsize);
274         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
275 }
276
277 /*
278   kill any clients that are registered with a IP that is being released
279  */
280 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
281 {
282         struct ctdb_client_ip *ip;
283
284         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
285                 ctdb_addr_to_str(addr)));
286
287         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
288                 ctdb_sock_addr tmp_addr;
289
290                 tmp_addr = ip->addr;
291                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
292                         ip->client_id,
293                         ctdb_addr_to_str(&ip->addr)));
294
295                 if (ctdb_same_ip(&tmp_addr, addr)) {
296                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
297                                                                      ip->client_id, 
298                                                                      struct ctdb_client);
299                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
300                                 ip->client_id,
301                                 ctdb_addr_to_str(&ip->addr),
302                                 client->pid));
303
304                         if (client->pid != 0) {
305                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
306                                         (unsigned)client->pid,
307                                         ctdb_addr_to_str(addr),
308                                         ip->client_id));
309                                 kill(client->pid, SIGKILL);
310                         }
311                 }
312         }
313 }
314
315 /*
316   called when releaseip event finishes
317  */
318 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
319                                 void *private_data)
320 {
321         struct takeover_callback_state *state = 
322                 talloc_get_type(private_data, struct takeover_callback_state);
323         TDB_DATA data;
324
325         /* send a message to all clients of this node telling them
326            that the cluster has been reconfigured and they should
327            release any sockets on this IP */
328         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
329         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
330         data.dsize = strlen((char *)data.dptr)+1;
331
332         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
333
334         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
335
336         /* kill clients that have registered with this IP */
337         release_kill_clients(ctdb, state->addr);
338         
339         /* the control succeeded */
340         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
341         talloc_free(state);
342 }
343
344 /*
345   release an ip address
346  */
347 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
348                                 struct ctdb_req_control *c,
349                                 TDB_DATA indata, 
350                                 bool *async_reply)
351 {
352         int ret;
353         struct takeover_callback_state *state;
354         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
355         struct ctdb_vnn *vnn;
356
357         /* update our vnn list */
358         vnn = find_public_ip_vnn(ctdb, &pip->addr);
359         if (vnn == NULL) {
360                 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
361                         ctdb_addr_to_str(&pip->addr)));
362                 return 0;
363         }
364         vnn->pnn = pip->pnn;
365
366         /* stop any previous arps */
367         talloc_free(vnn->takeover_ctx);
368         vnn->takeover_ctx = NULL;
369
370         if (!ctdb_sys_have_ip(&pip->addr)) {
371                 DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
372                         ctdb_addr_to_str(&pip->addr),
373                         vnn->public_netmask_bits, 
374                         vnn->iface));
375                 return 0;
376         }
377
378         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n", 
379                 ctdb_addr_to_str(&pip->addr),
380                 vnn->public_netmask_bits, 
381                 vnn->iface));
382
383         state = talloc(ctdb, struct takeover_callback_state);
384         CTDB_NO_MEMORY(ctdb, state);
385
386         state->c = talloc_steal(state, c);
387         state->addr = talloc(state, ctdb_sock_addr);       
388         CTDB_NO_MEMORY(ctdb, state->addr);
389         *state->addr = pip->addr;
390         state->vnn   = vnn;
391
392         ret = ctdb_event_script_callback(ctdb, 
393                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
394                                          state, release_ip_callback, state,
395                                          "releaseip %s %s %u",
396                                          vnn->iface, 
397                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
398                                          vnn->public_netmask_bits);
399         if (ret != 0) {
400                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
401                         ctdb_addr_to_str(&pip->addr),
402                         vnn->iface));
403                 talloc_free(state);
404                 return -1;
405         }
406
407         /* tell the control that we will be reply asynchronously */
408         *async_reply = true;
409         return 0;
410 }
411
412 /*
413   release an ip address old v4 style
414  */
415 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
416                                 struct ctdb_req_control *c,
417                                 TDB_DATA indata, 
418                                 bool *async_reply)
419 {
420         TDB_DATA data;
421         
422         data.dsize = sizeof(struct ctdb_public_ip);
423         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
424         CTDB_NO_MEMORY(ctdb, data.dptr);
425         
426         memcpy(data.dptr, indata.dptr, indata.dsize);
427         return ctdb_control_release_ip(ctdb, c, data, async_reply);
428 }
429
430
431 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
432 {
433         struct ctdb_vnn      *vnn;
434
435         /* Verify that we dont have an entry for this ip yet */
436         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
437                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
438                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
439                                 ctdb_addr_to_str(addr)));
440                         return -1;
441                 }               
442         }
443
444         /* create a new vnn structure for this ip address */
445         vnn = talloc_zero(ctdb, struct ctdb_vnn);
446         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
447         vnn->iface = talloc_strdup(vnn, iface);
448         CTDB_NO_MEMORY(ctdb, vnn->iface);
449         vnn->public_address      = *addr;
450         vnn->public_netmask_bits = mask;
451         vnn->pnn                 = -1;
452         
453         DLIST_ADD(ctdb->vnn, vnn);
454
455         return 0;
456 }
457
458
459 /*
460   setup the event script directory
461 */
462 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
463 {
464         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
465         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
466         return 0;
467 }
468
469 /*
470   setup the public address lists from a file
471 */
472 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
473 {
474         char **lines;
475         int nlines;
476         int i;
477
478         lines = file_lines_load(alist, &nlines, ctdb);
479         if (lines == NULL) {
480                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
481                 return -1;
482         }
483         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
484                 nlines--;
485         }
486
487         for (i=0;i<nlines;i++) {
488                 unsigned mask;
489                 ctdb_sock_addr addr;
490                 const char *addrstr;
491                 const char *iface;
492                 char *tok, *line;
493
494                 line = lines[i];
495                 while ((*line == ' ') || (*line == '\t')) {
496                         line++;
497                 }
498                 if (*line == '#') {
499                         continue;
500                 }
501                 if (strcmp(line, "") == 0) {
502                         continue;
503                 }
504                 tok = strtok(line, " \t");
505                 addrstr = tok;
506                 tok = strtok(NULL, " \t");
507                 if (tok == NULL) {
508                         if (NULL == ctdb->default_public_interface) {
509                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
510                                          i+1));
511                                 talloc_free(lines);
512                                 return -1;
513                         }
514                         iface = ctdb->default_public_interface;
515                 } else {
516                         iface = tok;
517                 }
518
519                 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
520                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
521                         talloc_free(lines);
522                         return -1;
523                 }
524                 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
525                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
526                         talloc_free(lines);
527                         return -1;
528                 }
529         }
530
531         talloc_free(lines);
532         return 0;
533 }
534
535
536
537
538 struct ctdb_public_ip_list {
539         struct ctdb_public_ip_list *next;
540         uint32_t pnn;
541         ctdb_sock_addr addr;
542 };
543
544
545 /* Given a physical node, return the number of
546    public addresses that is currently assigned to this node.
547 */
548 static int node_ip_coverage(struct ctdb_context *ctdb, 
549         int32_t pnn,
550         struct ctdb_public_ip_list *ips)
551 {
552         int num=0;
553
554         for (;ips;ips=ips->next) {
555                 if (ips->pnn == pnn) {
556                         num++;
557                 }
558         }
559         return num;
560 }
561
562
563 /* Check if this is a public ip known to the node, i.e. can that
564    node takeover this ip ?
565 */
566 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
567                 struct ctdb_public_ip_list *ip)
568 {
569         struct ctdb_all_public_ips *public_ips;
570         int i;
571
572         public_ips = ctdb->nodes[pnn]->public_ips;
573
574         if (public_ips == NULL) {
575                 return -1;
576         }
577
578         for (i=0;i<public_ips->num;i++) {
579                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
580                         /* yes, this node can serve this public ip */
581                         return 0;
582                 }
583         }
584
585         return -1;
586 }
587
588
589 /* search the node lists list for a node to takeover this ip.
590    pick the node that currently are serving the least number of ips
591    so that the ips get spread out evenly.
592 */
593 static int find_takeover_node(struct ctdb_context *ctdb, 
594                 struct ctdb_node_map *nodemap, uint32_t mask, 
595                 struct ctdb_public_ip_list *ip,
596                 struct ctdb_public_ip_list *all_ips)
597 {
598         int pnn, min=0, num;
599         int i;
600
601         pnn    = -1;
602         for (i=0;i<nodemap->num;i++) {
603                 if (nodemap->nodes[i].flags & mask) {
604                         /* This node is not healty and can not be used to serve
605                            a public address 
606                         */
607                         continue;
608                 }
609
610                 /* verify that this node can serve this ip */
611                 if (can_node_serve_ip(ctdb, i, ip)) {
612                         /* no it couldnt   so skip to the next node */
613                         continue;
614                 }
615
616                 num = node_ip_coverage(ctdb, i, all_ips);
617                 /* was this the first node we checked ? */
618                 if (pnn == -1) {
619                         pnn = i;
620                         min  = num;
621                 } else {
622                         if (num < min) {
623                                 pnn = i;
624                                 min  = num;
625                         }
626                 }
627         }       
628         if (pnn == -1) {
629                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
630                         ctdb_addr_to_str(&ip->addr)));
631
632                 return -1;
633         }
634
635         ip->pnn = pnn;
636         return 0;
637 }
638
639 struct ctdb_public_ip_list *
640 add_ip_to_merged_list(struct ctdb_context *ctdb,
641                         TALLOC_CTX *tmp_ctx, 
642                         struct ctdb_public_ip_list *ip_list, 
643                         struct ctdb_public_ip *ip)
644 {
645         struct ctdb_public_ip_list *tmp_ip; 
646
647         /* do we already have this ip in our merged list ?*/
648         for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
649
650                 /* we already have this public ip in the list */
651                 if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
652                         return ip_list;
653                 }
654         }
655
656         /* this is a new public ip, we must add it to the list */
657         tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
658         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
659         tmp_ip->pnn  = ip->pnn;
660         tmp_ip->addr = ip->addr;
661         tmp_ip->next = ip_list;
662
663         return tmp_ip;
664 }
665
666 struct ctdb_public_ip_list *
667 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
668 {
669         int i, j;
670         struct ctdb_public_ip_list *ip_list = NULL;
671         struct ctdb_all_public_ips *public_ips;
672
673         for (i=0;i<ctdb->num_nodes;i++) {
674                 public_ips = ctdb->nodes[i]->public_ips;
675
676                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
677                         continue;
678                 }
679
680                 /* there were no public ips for this node */
681                 if (public_ips == NULL) {
682                         continue;
683                 }               
684
685                 for (j=0;j<public_ips->num;j++) {
686                         ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
687                                         ip_list, &public_ips->ips[j]);
688                 }
689         }
690
691         return ip_list;
692 }
693
694 /*
695   make any IP alias changes for public addresses that are necessary 
696  */
697 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
698 {
699         int i, num_healthy, retries;
700         struct ctdb_public_ip ip;
701         struct ctdb_public_ipv4 ipv4;
702         uint32_t mask;
703         struct ctdb_public_ip_list *all_ips, *tmp_ip;
704         int maxnode, maxnum=0, minnode, minnum=0, num;
705         TDB_DATA data;
706         struct timeval timeout;
707         struct client_async_data *async_data;
708         struct ctdb_client_control_state *state;
709         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
710
711
712         ZERO_STRUCT(ip);
713
714         /* Count how many completely healthy nodes we have */
715         num_healthy = 0;
716         for (i=0;i<nodemap->num;i++) {
717                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
718                         num_healthy++;
719                 }
720         }
721
722         if (num_healthy > 0) {
723                 /* We have healthy nodes, so only consider them for 
724                    serving public addresses
725                 */
726                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
727         } else {
728                 /* We didnt have any completely healthy nodes so
729                    use "disabled" nodes as a fallback
730                 */
731                 mask = NODE_FLAGS_INACTIVE;
732         }
733
734         /* since nodes only know about those public addresses that
735            can be served by that particular node, no single node has
736            a full list of all public addresses that exist in the cluster.
737            Walk over all node structures and create a merged list of
738            all public addresses that exist in the cluster.
739         */
740         all_ips = create_merged_ip_list(ctdb, tmp_ctx);
741
742         /* If we want deterministic ip allocations, i.e. that the ip addresses
743            will always be allocated the same way for a specific set of
744            available/unavailable nodes.
745         */
746         if (1 == ctdb->tunable.deterministic_public_ips) {              
747                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
748                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
749                         tmp_ip->pnn = i%nodemap->num;
750                 }
751         }
752
753
754         /* mark all public addresses with a masked node as being served by
755            node -1
756         */
757         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
758                 if (tmp_ip->pnn == -1) {
759                         continue;
760                 }
761                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
762                         tmp_ip->pnn = -1;
763                 }
764         }
765
766         /* verify that the assigned nodes can serve that public ip
767            and set it to -1 if not
768         */
769         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
770                 if (tmp_ip->pnn == -1) {
771                         continue;
772                 }
773                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
774                         /* this node can not serve this ip. */
775                         tmp_ip->pnn = -1;
776                 }
777         }
778
779
780         /* now we must redistribute all public addresses with takeover node
781            -1 among the nodes available
782         */
783         retries = 0;
784 try_again:
785         /* loop over all ip's and find a physical node to cover for 
786            each unassigned ip.
787         */
788         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
789                 if (tmp_ip->pnn == -1) {
790                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
791                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
792                                         ctdb_addr_to_str(&tmp_ip->addr)));
793                         }
794                 }
795         }
796
797         /* If we dont want ips to fail back after a node becomes healthy
798            again, we wont even try to reallocat the ip addresses so that
799            they are evenly spread out.
800            This can NOT be used at the same time as DeterministicIPs !
801         */
802         if (1 == ctdb->tunable.no_ip_failback) {
803                 if (1 == ctdb->tunable.deterministic_public_ips) {
804                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
805                 }
806                 goto finished;
807         }
808
809
810         /* now, try to make sure the ip adresses are evenly distributed
811            across the node.
812            for each ip address, loop over all nodes that can serve this
813            ip and make sure that the difference between the node
814            serving the most and the node serving the least ip's are not greater
815            than 1.
816         */
817         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
818                 if (tmp_ip->pnn == -1) {
819                         continue;
820                 }
821
822                 /* Get the highest and lowest number of ips's served by any 
823                    valid node which can serve this ip.
824                 */
825                 maxnode = -1;
826                 minnode = -1;
827                 for (i=0;i<nodemap->num;i++) {
828                         if (nodemap->nodes[i].flags & mask) {
829                                 continue;
830                         }
831
832                         /* only check nodes that can actually serve this ip */
833                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
834                                 /* no it couldnt   so skip to the next node */
835                                 continue;
836                         }
837
838                         num = node_ip_coverage(ctdb, i, all_ips);
839                         if (maxnode == -1) {
840                                 maxnode = i;
841                                 maxnum  = num;
842                         } else {
843                                 if (num > maxnum) {
844                                         maxnode = i;
845                                         maxnum  = num;
846                                 }
847                         }
848                         if (minnode == -1) {
849                                 minnode = i;
850                                 minnum  = num;
851                         } else {
852                                 if (num < minnum) {
853                                         minnode = i;
854                                         minnum  = num;
855                                 }
856                         }
857                 }
858                 if (maxnode == -1) {
859                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
860                                 ctdb_addr_to_str(&tmp_ip->addr)));
861
862                         continue;
863                 }
864
865                 /* If we want deterministic IPs then dont try to reallocate 
866                    them to spread out the load.
867                 */
868                 if (1 == ctdb->tunable.deterministic_public_ips) {
869                         continue;
870                 }
871
872                 /* if the spread between the smallest and largest coverage by
873                    a node is >=2 we steal one of the ips from the node with
874                    most coverage to even things out a bit.
875                    try to do this at most 5 times  since we dont want to spend
876                    too much time balancing the ip coverage.
877                 */
878                 if ( (maxnum > minnum+1)
879                   && (retries < 5) ){
880                         struct ctdb_public_ip_list *tmp;
881
882                         /* mark one of maxnode's vnn's as unassigned and try
883                            again
884                         */
885                         for (tmp=all_ips;tmp;tmp=tmp->next) {
886                                 if (tmp->pnn == maxnode) {
887                                         tmp->pnn = -1;
888                                         retries++;
889                                         goto try_again;
890                                 }
891                         }
892                 }
893         }
894
895
896         /* finished distributing the public addresses, now just send the 
897            info out to the nodes
898         */
899 finished:
900
901         /* at this point ->pnn is the node which will own each IP
902            or -1 if there is no node that can cover this ip
903         */
904
905         /* now tell all nodes to delete any alias that they should not
906            have.  This will be a NOOP on nodes that don't currently
907            hold the given alias */
908         async_data = talloc_zero(tmp_ctx, struct client_async_data);
909         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
910
911         for (i=0;i<nodemap->num;i++) {
912                 /* don't talk to unconnected nodes, but do talk to banned nodes */
913                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
914                         continue;
915                 }
916
917                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
918                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
919                                 /* This node should be serving this
920                                    vnn so dont tell it to release the ip
921                                 */
922                                 continue;
923                         }
924                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
925                                 ipv4.pnn = tmp_ip->pnn;
926                                 ipv4.sin = tmp_ip->addr.ip;
927
928                                 timeout = TAKEOVER_TIMEOUT();
929                                 data.dsize = sizeof(ipv4);
930                                 data.dptr  = (uint8_t *)&ipv4;
931                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
932                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
933                                                 data, async_data,
934                                                 &timeout, NULL);
935                         } else {
936                                 ip.pnn  = tmp_ip->pnn;
937                                 ip.addr = tmp_ip->addr;
938
939                                 timeout = TAKEOVER_TIMEOUT();
940                                 data.dsize = sizeof(ip);
941                                 data.dptr  = (uint8_t *)&ip;
942                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
943                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
944                                                 data, async_data,
945                                                 &timeout, NULL);
946                         }
947
948                         if (state == NULL) {
949                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
950                                 talloc_free(tmp_ctx);
951                                 return -1;
952                         }
953                 
954                         ctdb_client_async_add(async_data, state);
955                 }
956         }
957         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
958                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
959                 talloc_free(tmp_ctx);
960                 return -1;
961         }
962         talloc_free(async_data);
963
964
965         /* tell all nodes to get their own IPs */
966         async_data = talloc_zero(tmp_ctx, struct client_async_data);
967         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
968         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
969                 if (tmp_ip->pnn == -1) {
970                         /* this IP won't be taken over */
971                         continue;
972                 }
973
974                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
975                         ipv4.pnn = tmp_ip->pnn;
976                         ipv4.sin = tmp_ip->addr.ip;
977
978                         timeout = TAKEOVER_TIMEOUT();
979                         data.dsize = sizeof(ipv4);
980                         data.dptr  = (uint8_t *)&ipv4;
981                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
982                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
983                                         data, async_data,
984                                         &timeout, NULL);
985                 } else {
986                         ip.pnn  = tmp_ip->pnn;
987                         ip.addr = tmp_ip->addr;
988
989                         timeout = TAKEOVER_TIMEOUT();
990                         data.dsize = sizeof(ip);
991                         data.dptr  = (uint8_t *)&ip;
992                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
993                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
994                                         data, async_data,
995                                         &timeout, NULL);
996                 }
997                 if (state == NULL) {
998                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
999                         talloc_free(tmp_ctx);
1000                         return -1;
1001                 }
1002                 
1003                 ctdb_client_async_add(async_data, state);
1004         }
1005         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1006                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1007                 talloc_free(tmp_ctx);
1008                 return -1;
1009         }
1010
1011         talloc_free(tmp_ctx);
1012         return 0;
1013 }
1014
1015
1016 /*
1017   destroy a ctdb_client_ip structure
1018  */
1019 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1020 {
1021         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1022                 ctdb_addr_to_str(&ip->addr),
1023                 ntohs(ip->addr.ip.sin_port),
1024                 ip->client_id));
1025
1026         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1027         return 0;
1028 }
1029
1030 /*
1031   called by a client to inform us of a TCP connection that it is managing
1032   that should tickled with an ACK when IP takeover is done
1033   we handle both the old ipv4 style of packets as well as the new ipv4/6
1034   pdus.
1035  */
1036 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1037                                 TDB_DATA indata)
1038 {
1039         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1040         struct ctdb_control_tcp *old_addr = NULL;
1041         struct ctdb_control_tcp_addr new_addr;
1042         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1043         struct ctdb_tcp_list *tcp;
1044         struct ctdb_control_tcp_vnn t;
1045         int ret;
1046         TDB_DATA data;
1047         struct ctdb_client_ip *ip;
1048         struct ctdb_vnn *vnn;
1049         ctdb_sock_addr addr;
1050
1051         switch (indata.dsize) {
1052         case sizeof(struct ctdb_control_tcp):
1053                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1054                 ZERO_STRUCT(new_addr);
1055                 tcp_sock = &new_addr;
1056                 tcp_sock->src.ip  = old_addr->src;
1057                 tcp_sock->dest.ip = old_addr->dest;
1058                 break;
1059         case sizeof(struct ctdb_control_tcp_addr):
1060                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1061                 break;
1062         default:
1063                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed to ctdb_control_tcp_client. size was %d but only allowed sizes are %lu and %lu\n", (int)indata.dsize, sizeof(struct ctdb_control_tcp), sizeof(struct ctdb_control_tcp_addr)));
1064                 return -1;
1065         }
1066
1067         addr = tcp_sock->src;
1068         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1069         addr = tcp_sock->dest;
1070         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1071
1072         ZERO_STRUCT(addr);
1073         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1074         vnn = find_public_ip_vnn(ctdb, &addr);
1075         if (vnn == NULL) {
1076                 switch (addr.sa.sa_family) {
1077                 case AF_INET:
1078                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1079                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1080                                         ctdb_addr_to_str(&addr)));
1081                         }
1082                         break;
1083                 case AF_INET6:
1084                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1085                                 ctdb_addr_to_str(&addr)));
1086                         break;
1087                 default:
1088                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1089                 }
1090
1091                 return 0;
1092         }
1093
1094         if (vnn->pnn != ctdb->pnn) {
1095                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1096                         ctdb_addr_to_str(&addr),
1097                         client_id, client->pid));
1098                 /* failing this call will tell smbd to die */
1099                 return -1;
1100         }
1101
1102         ip = talloc(client, struct ctdb_client_ip);
1103         CTDB_NO_MEMORY(ctdb, ip);
1104
1105         ip->ctdb      = ctdb;
1106         ip->addr      = addr;
1107         ip->client_id = client_id;
1108         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1109         DLIST_ADD(ctdb->client_ip_list, ip);
1110
1111         tcp = talloc(client, struct ctdb_tcp_list);
1112         CTDB_NO_MEMORY(ctdb, tcp);
1113
1114         tcp->connection.src_addr = tcp_sock->src;
1115         tcp->connection.dst_addr = tcp_sock->dest;
1116
1117         DLIST_ADD(client->tcp_list, tcp);
1118
1119         t.src  = tcp_sock->src;
1120         t.dest = tcp_sock->dest;
1121
1122         data.dptr = (uint8_t *)&t;
1123         data.dsize = sizeof(t);
1124
1125         switch (addr.sa.sa_family) {
1126         case AF_INET:
1127                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1128                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1129                         ctdb_addr_to_str(&tcp_sock->src),
1130                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1131                 break;
1132         case AF_INET6:
1133                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1134                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1135                         ctdb_addr_to_str(&tcp_sock->src),
1136                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1137                 break;
1138         default:
1139                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1140         }
1141
1142
1143         /* tell all nodes about this tcp connection */
1144         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1145                                        CTDB_CONTROL_TCP_ADD,
1146                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1147         if (ret != 0) {
1148                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1149                 return -1;
1150         }
1151
1152         return 0;
1153 }
1154
1155 /*
1156   find a tcp address on a list
1157  */
1158 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1159                                            struct ctdb_tcp_connection *tcp)
1160 {
1161         int i;
1162
1163         if (array == NULL) {
1164                 return NULL;
1165         }
1166
1167         for (i=0;i<array->num;i++) {
1168                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1169                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1170                         return &array->connections[i];
1171                 }
1172         }
1173         return NULL;
1174 }
1175
1176 /*
1177   called by a daemon to inform us of a TCP connection that one of its
1178   clients managing that should tickled with an ACK when IP takeover is
1179   done
1180  */
1181 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1182 {
1183         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1184         struct ctdb_tcp_array *tcparray;
1185         struct ctdb_tcp_connection tcp;
1186         struct ctdb_vnn *vnn;
1187
1188         vnn = find_public_ip_vnn(ctdb, &p->dest);
1189         if (vnn == NULL) {
1190                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1191                         ctdb_addr_to_str(&p->dest)));
1192
1193                 return -1;
1194         }
1195
1196
1197         tcparray = vnn->tcp_array;
1198
1199         /* If this is the first tickle */
1200         if (tcparray == NULL) {
1201                 tcparray = talloc_size(ctdb->nodes, 
1202                         offsetof(struct ctdb_tcp_array, connections) +
1203                         sizeof(struct ctdb_tcp_connection) * 1);
1204                 CTDB_NO_MEMORY(ctdb, tcparray);
1205                 vnn->tcp_array = tcparray;
1206
1207                 tcparray->num = 0;
1208                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1209                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1210
1211                 tcparray->connections[tcparray->num].src_addr = p->src;
1212                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1213                 tcparray->num++;
1214                 return 0;
1215         }
1216
1217
1218         /* Do we already have this tickle ?*/
1219         tcp.src_addr = p->src;
1220         tcp.dst_addr = p->dest;
1221         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1222                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1223                         ctdb_addr_to_str(&tcp.dst_addr),
1224                         ntohs(tcp.dst_addr.ip.sin_port),
1225                         vnn->pnn));
1226                 return 0;
1227         }
1228
1229         /* A new tickle, we must add it to the array */
1230         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1231                                         struct ctdb_tcp_connection,
1232                                         tcparray->num+1);
1233         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1234
1235         vnn->tcp_array = tcparray;
1236         tcparray->connections[tcparray->num].src_addr = p->src;
1237         tcparray->connections[tcparray->num].dst_addr = p->dest;
1238         tcparray->num++;
1239                                 
1240         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1241                 ctdb_addr_to_str(&tcp.dst_addr),
1242                 ntohs(tcp.dst_addr.ip.sin_port),
1243                 vnn->pnn));
1244
1245         return 0;
1246 }
1247
1248
1249 /*
1250   called by a daemon to inform us of a TCP connection that one of its
1251   clients managing that should tickled with an ACK when IP takeover is
1252   done
1253  */
1254 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1255 {
1256         struct ctdb_tcp_connection *tcpp;
1257         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1258
1259         if (vnn == NULL) {
1260                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1261                         ctdb_addr_to_str(&conn->dst_addr)));
1262                 return;
1263         }
1264
1265         /* if the array is empty we cant remove it
1266            and we dont need to do anything
1267          */
1268         if (vnn->tcp_array == NULL) {
1269                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1270                         ctdb_addr_to_str(&conn->dst_addr),
1271                         ntohs(conn->dst_addr.ip.sin_port)));
1272                 return;
1273         }
1274
1275
1276         /* See if we know this connection
1277            if we dont know this connection  then we dont need to do anything
1278          */
1279         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1280         if (tcpp == NULL) {
1281                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1282                         ctdb_addr_to_str(&conn->dst_addr),
1283                         ntohs(conn->dst_addr.ip.sin_port)));
1284                 return;
1285         }
1286
1287
1288         /* We need to remove this entry from the array.
1289            Instead of allocating a new array and copying data to it
1290            we cheat and just copy the last entry in the existing array
1291            to the entry that is to be removed and just shring the 
1292            ->num field
1293          */
1294         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1295         vnn->tcp_array->num--;
1296
1297         /* If we deleted the last entry we also need to remove the entire array
1298          */
1299         if (vnn->tcp_array->num == 0) {
1300                 talloc_free(vnn->tcp_array);
1301                 vnn->tcp_array = NULL;
1302         }               
1303
1304         vnn->tcp_update_needed = true;
1305
1306         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1307                 ctdb_addr_to_str(&conn->src_addr),
1308                 ntohs(conn->src_addr.ip.sin_port)));
1309 }
1310
1311
1312 /*
1313   called when a daemon restarts - send all tickes for all public addresses
1314   we are serving immediately to the new node.
1315  */
1316 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1317 {
1318 /*XXX here we should send all tickes we are serving to the new node */
1319         return 0;
1320 }
1321
1322
1323 /*
1324   called when a client structure goes away - hook to remove
1325   elements from the tcp_list in all daemons
1326  */
1327 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1328 {
1329         while (client->tcp_list) {
1330                 struct ctdb_tcp_list *tcp = client->tcp_list;
1331                 DLIST_REMOVE(client->tcp_list, tcp);
1332                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1333         }
1334 }
1335
1336
1337 /*
1338   release all IPs on shutdown
1339  */
1340 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1341 {
1342         struct ctdb_vnn *vnn;
1343
1344         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1345                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1346                         continue;
1347                 }
1348                 if (vnn->pnn == ctdb->pnn) {
1349                         vnn->pnn = -1;
1350                 }
1351                 ctdb_event_script(ctdb, "releaseip %s %s %u",
1352                                   vnn->iface, 
1353                                   talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1354                                   vnn->public_netmask_bits);
1355                 release_kill_clients(ctdb, &vnn->public_address);
1356         }
1357 }
1358
1359
1360 /*
1361   get list of public IPs
1362  */
1363 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1364                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1365 {
1366         int i, num, len;
1367         struct ctdb_all_public_ips *ips;
1368         struct ctdb_vnn *vnn;
1369
1370         /* count how many public ip structures we have */
1371         num = 0;
1372         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1373                 num++;
1374         }
1375
1376         len = offsetof(struct ctdb_all_public_ips, ips) + 
1377                 num*sizeof(struct ctdb_public_ip);
1378         ips = talloc_zero_size(outdata, len);
1379         CTDB_NO_MEMORY(ctdb, ips);
1380
1381         outdata->dsize = len;
1382         outdata->dptr  = (uint8_t *)ips;
1383
1384         ips->num = num;
1385         i = 0;
1386         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1387                 ips->ips[i].pnn  = vnn->pnn;
1388                 ips->ips[i].addr = vnn->public_address;
1389                 i++;
1390         }
1391
1392         return 0;
1393 }
1394
1395
1396 /*
1397   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1398  */
1399 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1400                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1401 {
1402         int i, num, len;
1403         struct ctdb_all_public_ipsv4 *ips;
1404         struct ctdb_vnn *vnn;
1405
1406         /* count how many public ip structures we have */
1407         num = 0;
1408         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1409                 if (vnn->public_address.sa.sa_family != AF_INET) {
1410                         continue;
1411                 }
1412                 num++;
1413         }
1414
1415         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1416                 num*sizeof(struct ctdb_public_ipv4);
1417         ips = talloc_zero_size(outdata, len);
1418         CTDB_NO_MEMORY(ctdb, ips);
1419
1420         outdata->dsize = len;
1421         outdata->dptr  = (uint8_t *)ips;
1422
1423         ips->num = num;
1424         i = 0;
1425         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1426                 if (vnn->public_address.sa.sa_family != AF_INET) {
1427                         continue;
1428                 }
1429                 ips->ips[i].pnn = vnn->pnn;
1430                 ips->ips[i].sin = vnn->public_address.ip;
1431                 i++;
1432         }
1433
1434         return 0;
1435 }
1436
1437
1438 /* 
1439    structure containing the listening socket and the list of tcp connections
1440    that the ctdb daemon is to kill
1441 */
1442 struct ctdb_kill_tcp {
1443         struct ctdb_vnn *vnn;
1444         struct ctdb_context *ctdb;
1445         int capture_fd;
1446         struct fd_event *fde;
1447         trbt_tree_t *connections;
1448         void *private_data;
1449 };
1450
1451 /*
1452   a tcp connection that is to be killed
1453  */
1454 struct ctdb_killtcp_con {
1455         ctdb_sock_addr src_addr;
1456         ctdb_sock_addr dst_addr;
1457         int count;
1458         struct ctdb_kill_tcp *killtcp;
1459 };
1460
1461 /* this function is used to create a key to represent this socketpair
1462    in the killtcp tree.
1463    this key is used to insert and lookup matching socketpairs that are
1464    to be tickled and RST
1465 */
1466 #define KILLTCP_KEYLEN  10
1467 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1468 {
1469         static uint32_t key[KILLTCP_KEYLEN];
1470
1471         bzero(key, sizeof(key));
1472
1473         if (src->sa.sa_family != dst->sa.sa_family) {
1474                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1475                 return key;
1476         }
1477         
1478         switch (src->sa.sa_family) {
1479         case AF_INET:
1480                 key[0]  = dst->ip.sin_addr.s_addr;
1481                 key[1]  = src->ip.sin_addr.s_addr;
1482                 key[2]  = dst->ip.sin_port;
1483                 key[3]  = src->ip.sin_port;
1484                 break;
1485         case AF_INET6:
1486                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
1487                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
1488                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
1489                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
1490                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
1491                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
1492                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
1493                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
1494                 key[8]  = dst->ip6.sin6_port;
1495                 key[9]  = src->ip6.sin6_port;
1496                 break;
1497         default:
1498                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1499                 return key;
1500         }
1501
1502         return key;
1503 }
1504
1505 /*
1506   called when we get a read event on the raw socket
1507  */
1508 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
1509                                 uint16_t flags, void *private_data)
1510 {
1511         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1512         struct ctdb_killtcp_con *con;
1513         ctdb_sock_addr src, dst;
1514         uint32_t ack_seq, seq;
1515
1516         if (!(flags & EVENT_FD_READ)) {
1517                 return;
1518         }
1519
1520         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1521                                 killtcp->private_data,
1522                                 &src, &dst,
1523                                 &ack_seq, &seq) != 0) {
1524                 /* probably a non-tcp ACK packet */
1525                 return;
1526         }
1527
1528         /* check if we have this guy in our list of connections
1529            to kill
1530         */
1531         con = trbt_lookuparray32(killtcp->connections, 
1532                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1533         if (con == NULL) {
1534                 /* no this was some other packet we can just ignore */
1535                 return;
1536         }
1537
1538         /* This one has been tickled !
1539            now reset him and remove him from the list.
1540          */
1541         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1542                 ntohs(con->dst_addr.ip.sin_port),
1543                 ctdb_addr_to_str(&con->src_addr),
1544                 ntohs(con->src_addr.ip.sin_port)));
1545
1546         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1547         talloc_free(con);
1548 }
1549
1550
1551 /* when traversing the list of all tcp connections to send tickle acks to
1552    (so that we can capture the ack coming back and kill the connection
1553     by a RST)
1554    this callback is called for each connection we are currently trying to kill
1555 */
1556 static void tickle_connection_traverse(void *param, void *data)
1557 {
1558         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1559
1560         /* have tried too many times, just give up */
1561         if (con->count >= 5) {
1562                 talloc_free(con);
1563                 return;
1564         }
1565
1566         /* othervise, try tickling it again */
1567         con->count++;
1568         ctdb_sys_send_tcp(
1569                 (ctdb_sock_addr *)&con->dst_addr,
1570                 (ctdb_sock_addr *)&con->src_addr,
1571                 0, 0, 0);
1572 }
1573
1574
1575 /* 
1576    called every second until all sentenced connections have been reset
1577  */
1578 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
1579                                               struct timeval t, void *private_data)
1580 {
1581         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1582
1583
1584         /* loop over all connections sending tickle ACKs */
1585         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1586
1587
1588         /* If there are no more connections to kill we can remove the
1589            entire killtcp structure
1590          */
1591         if ( (killtcp->connections == NULL) || 
1592              (killtcp->connections->root == NULL) ) {
1593                 talloc_free(killtcp);
1594                 return;
1595         }
1596
1597         /* try tickling them again in a seconds time
1598          */
1599         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1600                         ctdb_tickle_sentenced_connections, killtcp);
1601 }
1602
1603 /*
1604   destroy the killtcp structure
1605  */
1606 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1607 {
1608         killtcp->vnn->killtcp = NULL;
1609         return 0;
1610 }
1611
1612
1613 /* nothing fancy here, just unconditionally replace any existing
1614    connection structure with the new one.
1615
1616    dont even free the old one if it did exist, that one is talloc_stolen
1617    by the same node in the tree anyway and will be deleted when the new data 
1618    is deleted
1619 */
1620 static void *add_killtcp_callback(void *parm, void *data)
1621 {
1622         return parm;
1623 }
1624
1625 /*
1626   add a tcp socket to the list of connections we want to RST
1627  */
1628 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
1629                                        ctdb_sock_addr *s,
1630                                        ctdb_sock_addr *d)
1631 {
1632         ctdb_sock_addr src, dst;
1633         struct ctdb_kill_tcp *killtcp;
1634         struct ctdb_killtcp_con *con;
1635         struct ctdb_vnn *vnn;
1636
1637         ctdb_canonicalize_ip(s, &src);
1638         ctdb_canonicalize_ip(d, &dst);
1639
1640         vnn = find_public_ip_vnn(ctdb, &dst);
1641         if (vnn == NULL) {
1642                 vnn = find_public_ip_vnn(ctdb, &src);
1643         }
1644         if (vnn == NULL) {
1645                 /* if it is not a public ip   it could be our 'single ip' */
1646                 if (ctdb->single_ip_vnn) {
1647                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1648                                 vnn = ctdb->single_ip_vnn;
1649                         }
1650                 }
1651         }
1652         if (vnn == NULL) {
1653                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
1654                 return -1;
1655         }
1656
1657         killtcp = vnn->killtcp;
1658         
1659         /* If this is the first connection to kill we must allocate
1660            a new structure
1661          */
1662         if (killtcp == NULL) {
1663                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1664                 CTDB_NO_MEMORY(ctdb, killtcp);
1665
1666                 killtcp->vnn         = vnn;
1667                 killtcp->ctdb        = ctdb;
1668                 killtcp->capture_fd  = -1;
1669                 killtcp->connections = trbt_create(killtcp, 0);
1670
1671                 vnn->killtcp         = killtcp;
1672                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1673         }
1674
1675
1676
1677         /* create a structure that describes this connection we want to
1678            RST and store it in killtcp->connections
1679         */
1680         con = talloc(killtcp, struct ctdb_killtcp_con);
1681         CTDB_NO_MEMORY(ctdb, con);
1682         con->src_addr = src;
1683         con->dst_addr = dst;
1684         con->count    = 0;
1685         con->killtcp  = killtcp;
1686
1687
1688         trbt_insertarray32_callback(killtcp->connections,
1689                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1690                         add_killtcp_callback, con);
1691
1692         /* 
1693            If we dont have a socket to listen on yet we must create it
1694          */
1695         if (killtcp->capture_fd == -1) {
1696                 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1697                 if (killtcp->capture_fd == -1) {
1698                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1699                         goto failed;
1700                 }
1701         }
1702
1703
1704         if (killtcp->fde == NULL) {
1705                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
1706                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
1707                                             capture_tcp_handler, killtcp);
1708
1709                 /* We also need to set up some events to tickle all these connections
1710                    until they are all reset
1711                 */
1712                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1713                                 ctdb_tickle_sentenced_connections, killtcp);
1714         }
1715
1716         /* tickle him once now */
1717         ctdb_sys_send_tcp(
1718                 &con->dst_addr,
1719                 &con->src_addr,
1720                 0, 0, 0);
1721
1722         return 0;
1723
1724 failed:
1725         talloc_free(vnn->killtcp);
1726         vnn->killtcp = NULL;
1727         return -1;
1728 }
1729
1730 /*
1731   kill a TCP connection.
1732  */
1733 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1734 {
1735         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1736
1737         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1738 }
1739
1740 /*
1741   called by a daemon to inform us of the entire list of TCP tickles for
1742   a particular public address.
1743   this control should only be sent by the node that is currently serving
1744   that public address.
1745  */
1746 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1747 {
1748         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1749         struct ctdb_tcp_array *tcparray;
1750         struct ctdb_vnn *vnn;
1751
1752         /* We must at least have tickles.num or else we cant verify the size
1753            of the received data blob
1754          */
1755         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1756                                         tickles.connections)) {
1757                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1758                 return -1;
1759         }
1760
1761         /* verify that the size of data matches what we expect */
1762         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1763                                 tickles.connections)
1764                          + sizeof(struct ctdb_tcp_connection)
1765                                  * list->tickles.num) {
1766                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1767                 return -1;
1768         }       
1769
1770         vnn = find_public_ip_vnn(ctdb, &list->addr);
1771         if (vnn == NULL) {
1772                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
1773                         ctdb_addr_to_str(&list->addr)));
1774
1775                 return 1;
1776         }
1777
1778         /* remove any old ticklelist we might have */
1779         talloc_free(vnn->tcp_array);
1780         vnn->tcp_array = NULL;
1781
1782         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1783         CTDB_NO_MEMORY(ctdb, tcparray);
1784
1785         tcparray->num = list->tickles.num;
1786
1787         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1788         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1789
1790         memcpy(tcparray->connections, &list->tickles.connections[0], 
1791                sizeof(struct ctdb_tcp_connection)*tcparray->num);
1792
1793         /* We now have a new fresh tickle list array for this vnn */
1794         vnn->tcp_array = talloc_steal(vnn, tcparray);
1795         
1796         return 0;
1797 }
1798
1799 /*
1800   called to return the full list of tickles for the puclic address associated 
1801   with the provided vnn
1802  */
1803 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1804 {
1805         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1806         struct ctdb_control_tcp_tickle_list *list;
1807         struct ctdb_tcp_array *tcparray;
1808         int num;
1809         struct ctdb_vnn *vnn;
1810
1811         vnn = find_public_ip_vnn(ctdb, addr);
1812         if (vnn == NULL) {
1813                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
1814                         ctdb_addr_to_str(addr)));
1815
1816                 return 1;
1817         }
1818
1819         tcparray = vnn->tcp_array;
1820         if (tcparray) {
1821                 num = tcparray->num;
1822         } else {
1823                 num = 0;
1824         }
1825
1826         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1827                                 tickles.connections)
1828                         + sizeof(struct ctdb_tcp_connection) * num;
1829
1830         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1831         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1832         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1833
1834         list->addr = *addr;
1835         list->tickles.num = num;
1836         if (num) {
1837                 memcpy(&list->tickles.connections[0], tcparray->connections, 
1838                         sizeof(struct ctdb_tcp_connection) * num);
1839         }
1840
1841         return 0;
1842 }
1843
1844
1845 /*
1846   set the list of all tcp tickles for a public address
1847  */
1848 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
1849                               struct timeval timeout, uint32_t destnode, 
1850                               ctdb_sock_addr *addr,
1851                               struct ctdb_tcp_array *tcparray)
1852 {
1853         int ret, num;
1854         TDB_DATA data;
1855         struct ctdb_control_tcp_tickle_list *list;
1856
1857         if (tcparray) {
1858                 num = tcparray->num;
1859         } else {
1860                 num = 0;
1861         }
1862
1863         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1864                                 tickles.connections) +
1865                         sizeof(struct ctdb_tcp_connection) * num;
1866         data.dptr = talloc_size(ctdb, data.dsize);
1867         CTDB_NO_MEMORY(ctdb, data.dptr);
1868
1869         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1870         list->addr = *addr;
1871         list->tickles.num = num;
1872         if (tcparray) {
1873                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1874         }
1875
1876         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1877                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1878                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1879         if (ret != 0) {
1880                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1881                 return -1;
1882         }
1883
1884         talloc_free(data.dptr);
1885
1886         return ret;
1887 }
1888
1889
1890 /*
1891   perform tickle updates if required
1892  */
1893 static void ctdb_update_tcp_tickles(struct event_context *ev, 
1894                                 struct timed_event *te, 
1895                                 struct timeval t, void *private_data)
1896 {
1897         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1898         int ret;
1899         struct ctdb_vnn *vnn;
1900
1901         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1902                 /* we only send out updates for public addresses that 
1903                    we have taken over
1904                  */
1905                 if (ctdb->pnn != vnn->pnn) {
1906                         continue;
1907                 }
1908                 /* We only send out the updates if we need to */
1909                 if (!vnn->tcp_update_needed) {
1910                         continue;
1911                 }
1912                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
1913                                 TAKEOVER_TIMEOUT(),
1914                                 CTDB_BROADCAST_CONNECTED,
1915                                 &vnn->public_address,
1916                                 vnn->tcp_array);
1917                 if (ret != 0) {
1918                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1919                                 ctdb_addr_to_str(&vnn->public_address)));
1920                 }
1921         }
1922
1923         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1924                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1925                              ctdb_update_tcp_tickles, ctdb);
1926 }               
1927         
1928
1929 /*
1930   start periodic update of tcp tickles
1931  */
1932 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1933 {
1934         ctdb->tickle_update_context = talloc_new(ctdb);
1935
1936         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1937                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1938                              ctdb_update_tcp_tickles, ctdb);
1939 }
1940
1941
1942
1943
1944 struct control_gratious_arp {
1945         struct ctdb_context *ctdb;
1946         ctdb_sock_addr addr;
1947         const char *iface;
1948         int count;
1949 };
1950
1951 /*
1952   send a control_gratuitous arp
1953  */
1954 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
1955                                   struct timeval t, void *private_data)
1956 {
1957         int ret;
1958         struct control_gratious_arp *arp = talloc_get_type(private_data, 
1959                                                         struct control_gratious_arp);
1960
1961         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1962         if (ret != 0) {
1963                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
1964         }
1965
1966
1967         arp->count++;
1968         if (arp->count == CTDB_ARP_REPEAT) {
1969                 talloc_free(arp);
1970                 return;
1971         }
1972
1973         event_add_timed(arp->ctdb->ev, arp, 
1974                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
1975                         send_gratious_arp, arp);
1976 }
1977
1978
1979 /*
1980   send a gratious arp 
1981  */
1982 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
1983 {
1984         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
1985         struct control_gratious_arp *arp;
1986
1987         /* verify the size of indata */
1988         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
1989                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
1990                                  (unsigned)indata.dsize, 
1991                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
1992                 return -1;
1993         }
1994         if (indata.dsize != 
1995                 ( offsetof(struct ctdb_control_gratious_arp, iface)
1996                 + gratious_arp->len ) ){
1997
1998                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
1999                         "but should be %u bytes\n", 
2000                          (unsigned)indata.dsize, 
2001                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2002                 return -1;
2003         }
2004
2005
2006         arp = talloc(ctdb, struct control_gratious_arp);
2007         CTDB_NO_MEMORY(ctdb, arp);
2008
2009         arp->ctdb  = ctdb;
2010         arp->addr   = gratious_arp->addr;
2011         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2012         CTDB_NO_MEMORY(ctdb, arp->iface);
2013         arp->count = 0;
2014         
2015         event_add_timed(arp->ctdb->ev, arp, 
2016                         timeval_zero(), send_gratious_arp, arp);
2017
2018         return 0;
2019 }
2020
2021 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2022 {
2023         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2024         int ret;
2025
2026         /* verify the size of indata */
2027         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2028                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2029                 return -1;
2030         }
2031         if (indata.dsize != 
2032                 ( offsetof(struct ctdb_control_ip_iface, iface)
2033                 + pub->len ) ){
2034
2035                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2036                         "but should be %u bytes\n", 
2037                          (unsigned)indata.dsize, 
2038                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2039                 return -1;
2040         }
2041
2042         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2043
2044         if (ret != 0) {
2045                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2046                 return -1;
2047         }
2048
2049         return 0;
2050 }
2051
2052 /*
2053   called when releaseip event finishes for del_public_address
2054  */
2055 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2056                                 void *private_data)
2057 {
2058         talloc_free(private_data);
2059 }
2060
2061 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2062 {
2063         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2064         struct ctdb_vnn *vnn;
2065         int ret;
2066
2067         /* verify the size of indata */
2068         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2069                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2070                 return -1;
2071         }
2072         if (indata.dsize != 
2073                 ( offsetof(struct ctdb_control_ip_iface, iface)
2074                 + pub->len ) ){
2075
2076                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2077                         "but should be %u bytes\n", 
2078                          (unsigned)indata.dsize, 
2079                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2080                 return -1;
2081         }
2082
2083         /* walk over all public addresses until we find a match */
2084         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2085                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2086                         TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2087
2088                         DLIST_REMOVE(ctdb->vnn, vnn);
2089
2090                         ret = ctdb_event_script_callback(ctdb, 
2091                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
2092                                          mem_ctx, delete_ip_callback, mem_ctx,
2093                                          "releaseip %s %s %u",
2094                                          vnn->iface, 
2095                                          talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2096                                          vnn->public_netmask_bits);
2097                         talloc_free(vnn);
2098                         if (ret != 0) {
2099                                 return -1;
2100                         }
2101                         return 0;
2102                 }
2103         }
2104
2105         return -1;
2106 }
2107