4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "../tdb/include/tdb.h"
23 #include "system/network.h"
24 #include "system/filesys.h"
25 #include "system/wait.h"
26 #include "../include/ctdb_private.h"
29 #define TAKEOVER_TIMEOUT() timeval_current_ofs(5,0)
31 #define CTDB_ARP_INTERVAL 1
32 #define CTDB_ARP_REPEAT 3
34 struct ctdb_takeover_arp {
35 struct ctdb_context *ctdb;
37 struct sockaddr_in sin;
38 struct ctdb_tcp_list *tcp_list;
42 lists of tcp endpoints
44 struct ctdb_tcp_list {
45 struct ctdb_tcp_list *prev, *next;
47 struct sockaddr_in saddr;
48 struct sockaddr_in daddr;
56 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
57 struct timeval t, void *private_data)
59 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
60 struct ctdb_takeover_arp);
62 struct ctdb_tcp_list *tcp;
64 ret = ctdb_sys_send_arp(&arp->sin, arp->ctdb->takeover.interface);
66 DEBUG(0,(__location__ "sending of arp failed (%s)\n", strerror(errno)));
69 for (tcp=arp->tcp_list;tcp;tcp=tcp->next) {
70 DEBUG(2,("sending tcp tickle ack for %u->%s:%u\n",
71 (unsigned)ntohs(tcp->daddr.sin_port),
72 inet_ntoa(tcp->saddr.sin_addr),
73 (unsigned)ntohs(tcp->saddr.sin_port)));
74 ret = ctdb_sys_send_ack(&tcp->saddr, &tcp->daddr);
76 DEBUG(0,(__location__ " Failed to send tcp tickle ack for %s\n",
77 inet_ntoa(tcp->saddr.sin_addr)));
83 if (arp->count == CTDB_ARP_REPEAT) {
88 event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx,
89 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
90 ctdb_control_send_arp, arp);
95 take over an ip address
97 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata)
100 struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
101 struct ctdb_takeover_arp *arp;
102 char *ip = inet_ntoa(sin->sin_addr);
103 struct ctdb_tcp_list *tcp;
105 if (ctdb_sys_have_ip(ip)) {
109 DEBUG(0,("Takover of IP %s/%u on interface %s\n",
110 ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
111 ctdb->takeover.interface));
112 ret = ctdb_event_script(ctdb, "takeip %s %s %u",
113 ctdb->takeover.interface,
115 ctdb->nodes[ctdb->vnn]->public_netmask_bits);
117 DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
118 ip, ctdb->takeover.interface));
122 if (!ctdb->takeover.last_ctx) {
123 ctdb->takeover.last_ctx = talloc_new(ctdb);
124 CTDB_NO_MEMORY(ctdb, ctdb->takeover.last_ctx);
127 arp = talloc_zero(ctdb->takeover.last_ctx, struct ctdb_takeover_arp);
128 CTDB_NO_MEMORY(ctdb, arp);
133 /* add all of the known tcp connections for this IP to the
134 list of tcp connections to send tickle acks for */
135 for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
136 if (sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
137 struct ctdb_tcp_list *t2 = talloc(arp, struct ctdb_tcp_list);
138 CTDB_NO_MEMORY(ctdb, t2);
140 DLIST_ADD(arp->tcp_list, t2);
144 event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx,
145 timeval_zero(), ctdb_control_send_arp, arp);
151 release an ip address
153 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
155 struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
157 char *ip = inet_ntoa(sin->sin_addr);
159 struct ctdb_tcp_list *tcp;
161 if (!ctdb_sys_have_ip(ip)) {
165 DEBUG(0,("Release of IP %s/%u on interface %s\n",
166 ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
167 ctdb->takeover.interface));
169 /* stop any previous arps */
170 talloc_free(ctdb->takeover.last_ctx);
171 ctdb->takeover.last_ctx = NULL;
173 ret = ctdb_event_script(ctdb, "releaseip %s %s %u",
174 ctdb->takeover.interface,
176 ctdb->nodes[ctdb->vnn]->public_netmask_bits);
178 DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n",
179 ip, ctdb->takeover.interface));
183 /* send a message to all clients of this node telling them
184 that the cluster has been reconfigured and they should
185 release any sockets on this IP */
186 data.dptr = (uint8_t *)ip;
187 data.dsize = strlen(ip)+1;
189 ctdb_daemon_send_message(ctdb, ctdb->vnn, CTDB_SRVID_RELEASE_IP, data);
191 /* tell other nodes about any tcp connections we were holding with this IP */
192 for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
193 if (tcp->vnn == ctdb->vnn &&
194 sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
195 struct ctdb_control_tcp_vnn t;
201 data.dptr = (uint8_t *)&t;
202 data.dsize = sizeof(t);
204 ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_VNNMAP, 0,
205 CTDB_CONTROL_TCP_ADD,
206 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
216 setup the event script
218 int ctdb_set_event_script(struct ctdb_context *ctdb, const char *script)
220 ctdb->takeover.event_script = talloc_strdup(ctdb, script);
221 CTDB_NO_MEMORY(ctdb, ctdb->takeover.event_script);
226 setup the public address list from a file
228 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
234 lines = file_lines_load(alist, &nlines, ctdb);
236 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
239 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
243 if (nlines != ctdb->num_nodes) {
244 DEBUG(0,("Number of lines in %s does not match number of nodes!\n", alist));
249 for (i=0;i<nlines;i++) {
253 ctdb->nodes[i]->public_address = talloc_strdup(ctdb->nodes[i], lines[i]);
254 CTDB_NO_MEMORY(ctdb, ctdb->nodes[i]->public_address);
255 ctdb->nodes[i]->takeover_vnn = -1;
257 /* see if they supplied a netmask length */
258 p = strchr(ctdb->nodes[i]->public_address, '/');
260 DEBUG(0,("You must supply a netmask for public address %s\n",
261 ctdb->nodes[i]->public_address));
265 ctdb->nodes[i]->public_netmask_bits = atoi(p+1);
267 if (ctdb->nodes[i]->public_netmask_bits > 32) {
268 DEBUG(0, ("Illegal netmask for IP %s\n", ctdb->nodes[i]->public_address));
272 if (inet_aton(ctdb->nodes[i]->public_address, &in) == 0) {
273 DEBUG(0,("Badly formed IP '%s' in public address list\n", ctdb->nodes[i]->public_address));
283 see if two IPs are on the same subnet
285 static bool ctdb_same_subnet(const char *ip1, const char *ip2, uint8_t netmask_bits)
287 struct in_addr in1, in2;
290 inet_aton(ip1, &in1);
291 inet_aton(ip2, &in2);
293 mask = ~((1LL<<(32-netmask_bits))-1);
295 if ((ntohl(in1.s_addr) & mask) != (ntohl(in2.s_addr) & mask)) {
303 make any IP alias changes for public addresses that are necessary
305 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
310 /* work out which node will look after each public IP */
311 for (i=0;i<nodemap->num;i++) {
312 if (nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED) {
313 ctdb->nodes[i]->takeover_vnn = nodemap->nodes[i].vnn;
315 /* assign this dead nodes IP to the next higher node */
316 for (j=(i+1)%nodemap->num;
318 j=(j+1)%nodemap->num) {
319 if ((nodemap->nodes[j].flags & NODE_FLAGS_CONNECTED) &&
320 ctdb_same_subnet(ctdb->nodes[j]->public_address,
321 ctdb->nodes[i]->public_address,
322 ctdb->nodes[j]->public_netmask_bits)) {
323 ctdb->nodes[i]->takeover_vnn = nodemap->nodes[j].vnn;
328 DEBUG(0,(__location__ " No node available on same network to take %s\n",
329 ctdb->nodes[i]->public_address));
330 ctdb->nodes[i]->takeover_vnn = -1;
335 /* at this point ctdb->nodes[i]->takeover_vnn is the vnn which will own each IP */
338 /* now tell all nodes to delete any alias that they should not
339 have. This will be a NOOP on nodes that don't currently
340 hold the given alias */
341 for (i=0;i<nodemap->num;i++) {
342 /* don't talk to unconnected nodes */
343 if (!(nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED)) continue;
345 /* tell this node to delete all of the aliases that it should not have */
346 for (j=0;j<nodemap->num;j++) {
347 if (ctdb->nodes[j]->takeover_vnn != nodemap->nodes[i].vnn) {
348 ret = ctdb_ctrl_release_ip(ctdb, TAKEOVER_TIMEOUT(),
349 nodemap->nodes[i].vnn,
350 ctdb->nodes[j]->public_address);
352 DEBUG(0,("Failed to tell vnn %u to release IP %s\n",
353 nodemap->nodes[i].vnn,
354 ctdb->nodes[j]->public_address));
361 /* tell all nodes to get their own IPs */
362 for (i=0;i<nodemap->num;i++) {
363 ret = ctdb_ctrl_takeover_ip(ctdb, TAKEOVER_TIMEOUT(),
364 ctdb->nodes[i]->takeover_vnn,
365 ctdb->nodes[i]->public_address);
367 DEBUG(0,("Failed asking vnn %u to take over IP %s\n",
368 ctdb->nodes[i]->takeover_vnn,
369 ctdb->nodes[i]->public_address));
379 called by a client to inform us of a TCP connection that it is managing
380 that should tickled with an ACK when IP takeover is done
382 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, uint32_t vnn,
385 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
386 struct ctdb_control_tcp *p = (struct ctdb_control_tcp *)indata.dptr;
387 struct ctdb_tcp_list *tcp;
388 struct ctdb_control_tcp_vnn t;
392 tcp = talloc(client, struct ctdb_tcp_list);
393 CTDB_NO_MEMORY(ctdb, tcp);
397 tcp->daddr = p->dest;
399 DLIST_ADD(client->tcp_list, tcp);
405 data.dptr = (uint8_t *)&t;
406 data.dsize = sizeof(t);
408 /* tell all nodes about this tcp connection */
409 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_VNNMAP, 0,
410 CTDB_CONTROL_TCP_ADD,
411 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
413 DEBUG(0,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
421 see if two sockaddr_in are the same
423 static bool same_sockaddr_in(struct sockaddr_in *in1, struct sockaddr_in *in2)
425 return in1->sin_family == in2->sin_family &&
426 in1->sin_port == in2->sin_port &&
427 in1->sin_addr.s_addr == in2->sin_addr.s_addr;
431 find a tcp address on a list
433 static struct ctdb_tcp_list *ctdb_tcp_find(struct ctdb_tcp_list *list,
434 struct ctdb_tcp_list *tcp)
437 if (same_sockaddr_in(&list->saddr, &tcp->saddr) &&
438 same_sockaddr_in(&list->daddr, &tcp->daddr)) {
447 called by a daemon to inform us of a TCP connection that one of its
448 clients managing that should tickled with an ACK when IP takeover is
451 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
453 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
454 struct ctdb_tcp_list *tcp;
456 tcp = talloc(ctdb, struct ctdb_tcp_list);
457 CTDB_NO_MEMORY(ctdb, tcp);
461 tcp->daddr = p->dest;
463 if (NULL == ctdb_tcp_find(ctdb->tcp_list, tcp)) {
464 DLIST_ADD(ctdb->tcp_list, tcp);
465 DEBUG(2,("Added tickle info for %s:%u from vnn %u\n",
466 inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
469 DEBUG(4,("Already had tickle info for %s:%u from vnn %u\n",
470 inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
478 called by a daemon to inform us of a TCP connection that one of its
479 clients managing that should tickled with an ACK when IP takeover is
482 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
484 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
485 struct ctdb_tcp_list t, *tcp;
491 tcp = ctdb_tcp_find(ctdb->tcp_list, &t);
493 DEBUG(2,("Removed tickle info for %s:%u from vnn %u\n",
494 inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
496 DLIST_REMOVE(ctdb->tcp_list, tcp);
505 called when a daemon restarts - wipes all tcp entries from that vnn
507 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
509 struct ctdb_tcp_list *tcp, *next;
510 for (tcp=ctdb->tcp_list;tcp;tcp=next) {
512 if (tcp->vnn == vnn) {
513 DLIST_REMOVE(ctdb->tcp_list, tcp);
517 /* and tell the new guy about any that he should have
519 if (tcp->vnn == ctdb->vnn) {
520 struct ctdb_control_tcp_vnn t;
527 data.dptr = (uint8_t *)&t;
528 data.dsize = sizeof(t);
530 ctdb_daemon_send_control(ctdb, vnn, 0,
531 CTDB_CONTROL_TCP_ADD,
532 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
540 called when a client structure goes away - hook to remove
541 elements from the tcp_list in all daemons
543 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
545 while (client->tcp_list) {
547 struct ctdb_control_tcp_vnn p;
548 struct ctdb_tcp_list *tcp = client->tcp_list;
549 DLIST_REMOVE(client->tcp_list, tcp);
553 data.dptr = (uint8_t *)&p;
554 data.dsize = sizeof(p);
555 ctdb_daemon_send_control(client->ctdb, CTDB_BROADCAST_VNNMAP, 0,
556 CTDB_CONTROL_TCP_REMOVE,
557 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
564 release all IPs on shutdown
566 void ctdb_release_all_ips(struct ctdb_context *ctdb)
570 if (!ctdb->takeover.enabled) {
574 for (i=0;i<ctdb->num_nodes;i++) {
575 struct ctdb_node *node = ctdb->nodes[i];
576 if (ctdb_sys_have_ip(node->public_address)) {
577 ctdb_event_script(ctdb, "releaseip %s %s %u",
578 ctdb->takeover.interface,
579 node->public_address,
580 node->public_netmask_bits);