2 ctdb main protocol code
4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/util/dlinklist.h"
23 #include "system/network.h"
24 #include "system/filesys.h"
25 #include "../include/ctdb_private.h"
28 choose the transport we will use
30 int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
32 ctdb->transport = talloc_strdup(ctdb, transport);
33 CTDB_NO_MEMORY(ctdb, ctdb->transport);
39 Check whether an ip is a valid node ip
40 Returns the node id for this ip address or -1
42 int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const char *nodeip)
46 for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) {
47 if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) {
50 if (!strcmp(ctdb->nodes[nodeid]->address.address, nodeip)) {
59 choose the recovery lock file
61 int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file)
63 if (ctdb->recovery_lock_file != NULL) {
64 talloc_free(ctdb->recovery_lock_file);
65 ctdb->recovery_lock_file = NULL;
69 DEBUG(DEBUG_ALERT,("Recovery lock file set to \"\". Disabling recovery lock checking\n"));
70 ctdb->tunable.verify_recovery_lock = 0;
74 ctdb->recovery_lock_file = talloc_strdup(ctdb, file);
75 CTDB_NO_MEMORY(ctdb, ctdb->recovery_lock_file);
81 set the directory for the local databases
83 int ctdb_set_tdb_dir(struct ctdb_context *ctdb, const char *dir)
85 ctdb->db_directory = talloc_strdup(ctdb, dir);
86 if (ctdb->db_directory == NULL) {
93 set the directory for the persistent databases
95 int ctdb_set_tdb_dir_persistent(struct ctdb_context *ctdb, const char *dir)
97 ctdb->db_directory_persistent = talloc_strdup(ctdb, dir);
98 if (ctdb->db_directory_persistent == NULL) {
105 set the directory for internal state databases
107 int ctdb_set_tdb_dir_state(struct ctdb_context *ctdb, const char *dir)
109 ctdb->db_directory_state = talloc_strdup(ctdb, dir);
110 if (ctdb->db_directory_state == NULL) {
117 add a node to the list of nodes
119 static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
121 struct ctdb_node *node, **nodep;
123 nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1);
124 CTDB_NO_MEMORY(ctdb, nodep);
127 nodep = &ctdb->nodes[ctdb->num_nodes];
128 (*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node);
129 CTDB_NO_MEMORY(ctdb, *nodep);
132 if (ctdb_parse_address(ctdb, node, nstr, &node->address) != 0) {
136 node->name = talloc_asprintf(node, "%s:%u",
137 node->address.address,
139 /* this assumes that the nodes are kept in sorted order, and no gaps */
140 node->pnn = ctdb->num_nodes;
142 /* nodes start out disconnected and unhealthy */
143 node->flags = (NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY);
145 if (ctdb->address.address &&
146 ctdb_same_address(&ctdb->address, &node->address)) {
147 /* for automatic binding to interfaces, see tcp_connect.c */
148 ctdb->pnn = node->pnn;
152 node->dead_count = 0;
158 add an entry for a "deleted" node to the list of nodes.
159 a "deleted" node is a node that is commented out from the nodes file.
160 this is used to prevent that subsequent nodes in the nodes list
161 change their pnn value if a node is "delete" by commenting it out and then
162 using "ctdb reloadnodes" at runtime.
164 static int ctdb_add_deleted_node(struct ctdb_context *ctdb)
166 struct ctdb_node *node, **nodep;
168 nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1);
169 CTDB_NO_MEMORY(ctdb, nodep);
172 nodep = &ctdb->nodes[ctdb->num_nodes];
173 (*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node);
174 CTDB_NO_MEMORY(ctdb, *nodep);
177 if (ctdb_parse_address(ctdb, node, "0.0.0.0", &node->address) != 0) {
178 DEBUG(DEBUG_ERR,("Failed to setup deleted node %d\n", ctdb->num_nodes));
182 node->name = talloc_strdup(node, "0.0.0.0:0");
184 /* this assumes that the nodes are kept in sorted order, and no gaps */
185 node->pnn = ctdb->num_nodes;
187 /* this node is permanently deleted/disconnected */
188 node->flags = NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED;
191 node->dead_count = 0;
198 setup the node list from a file
200 static int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist)
204 int i, j, num_present;
206 talloc_free(ctdb->nodes);
210 lines = file_lines_load(nlist, &nlines, ctdb);
212 ctdb_set_error(ctdb, "Failed to load nlist '%s'\n", nlist);
215 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
220 for (i=0; i < nlines; i++) {
224 /* strip leading spaces */
225 while((*node == ' ') || (*node == '\t')) {
229 if (ctdb_add_deleted_node(ctdb) != 0) {
235 if (strcmp(node, "") == 0) {
238 if (ctdb_add_node(ctdb, node) != 0) {
245 /* initialize the vnn mapping table now that we have the nodes list,
246 skipping any deleted nodes
248 ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map);
249 CTDB_NO_MEMORY(ctdb, ctdb->vnn_map);
251 ctdb->vnn_map->generation = INVALID_GENERATION;
252 ctdb->vnn_map->size = num_present;
253 ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, ctdb->vnn_map->size);
254 CTDB_NO_MEMORY(ctdb, ctdb->vnn_map->map);
256 for(i=0, j=0; i < ctdb->vnn_map->size; i++) {
257 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
260 ctdb->vnn_map->map[j] = i;
268 void ctdb_load_nodes_file(struct ctdb_context *ctdb)
272 ret = ctdb_set_nlist(ctdb, ctdb->nodes_file);
274 DEBUG(DEBUG_ALERT,("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb)));
280 setup the local node address
282 int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
284 if (ctdb_parse_address(ctdb, ctdb, address, &ctdb->address) != 0) {
288 ctdb->name = talloc_asprintf(ctdb, "%s:%u",
289 ctdb->address.address,
296 return the number of active nodes
298 uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb)
302 for (i=0; i < ctdb->num_nodes; i++) {
303 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE)) {
312 called when we need to process a packet. This can be a requeued packet
313 after a lockwait, or a real packet from another node
315 void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
319 /* place the packet as a child of the tmp_ctx. We then use
320 talloc_free() below to free it. If any of the calls want
321 to keep it, then they will steal it somewhere else, and the
322 talloc_free() will only free the tmp_ctx */
323 tmp_ctx = talloc_new(ctdb);
324 talloc_steal(tmp_ctx, hdr);
326 DEBUG(DEBUG_DEBUG,(__location__ " ctdb request %u of type %u length %u from "
327 "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
328 hdr->srcnode, hdr->destnode));
330 switch (hdr->operation) {
332 case CTDB_REPLY_CALL:
333 case CTDB_REQ_DMASTER:
334 case CTDB_REPLY_DMASTER:
335 /* we dont allow these calls when banned */
336 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_BANNED) {
337 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
339 " length %u from node %u to %u while node"
341 hdr->operation, hdr->reqid,
343 hdr->srcnode, hdr->destnode));
347 /* for ctdb_call inter-node operations verify that the
348 remote node that sent us the call is running in the
349 same generation instance as this node
351 if (ctdb->vnn_map->generation != hdr->generation) {
352 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
354 " length %u from node %u to %u had an"
355 " invalid generation id:%u while our"
356 " generation id is:%u\n",
357 hdr->operation, hdr->reqid,
359 hdr->srcnode, hdr->destnode,
360 hdr->generation, ctdb->vnn_map->generation));
365 switch (hdr->operation) {
367 CTDB_INCREMENT_STAT(ctdb, node.req_call);
368 ctdb_request_call(ctdb, hdr);
371 case CTDB_REPLY_CALL:
372 CTDB_INCREMENT_STAT(ctdb, node.reply_call);
373 ctdb_reply_call(ctdb, hdr);
376 case CTDB_REPLY_ERROR:
377 CTDB_INCREMENT_STAT(ctdb, node.reply_error);
378 ctdb_reply_error(ctdb, hdr);
381 case CTDB_REQ_DMASTER:
382 CTDB_INCREMENT_STAT(ctdb, node.req_dmaster);
383 ctdb_request_dmaster(ctdb, hdr);
386 case CTDB_REPLY_DMASTER:
387 CTDB_INCREMENT_STAT(ctdb, node.reply_dmaster);
388 ctdb_reply_dmaster(ctdb, hdr);
391 case CTDB_REQ_MESSAGE:
392 CTDB_INCREMENT_STAT(ctdb, node.req_message);
393 ctdb_request_message(ctdb, hdr);
396 case CTDB_REQ_CONTROL:
397 CTDB_INCREMENT_STAT(ctdb, node.req_control);
398 ctdb_request_control(ctdb, hdr);
401 case CTDB_REPLY_CONTROL:
402 CTDB_INCREMENT_STAT(ctdb, node.reply_control);
403 ctdb_reply_control(ctdb, hdr);
406 case CTDB_REQ_KEEPALIVE:
407 CTDB_INCREMENT_STAT(ctdb, keepalive_packets_recv);
411 DEBUG(DEBUG_CRIT,("%s: Packet with unknown operation %u\n",
412 __location__, hdr->operation));
417 talloc_free(tmp_ctx);
422 called by the transport layer when a node is dead
424 void ctdb_node_dead(struct ctdb_node *node)
426 if (node->flags & NODE_FLAGS_DISCONNECTED) {
427 DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n",
428 node->ctdb->name, node->name,
429 node->ctdb->num_connected));
432 node->ctdb->num_connected--;
433 node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY;
435 node->dead_count = 0;
437 DEBUG(DEBUG_NOTICE,("%s: node %s is dead: %u connected\n",
438 node->ctdb->name, node->name, node->ctdb->num_connected));
439 ctdb_daemon_cancel_controls(node->ctdb, node);
441 if (node->ctdb->methods == NULL) {
442 DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n"));
446 node->ctdb->methods->restart(node);
450 called by the transport layer when a node is connected
452 void ctdb_node_connected(struct ctdb_node *node)
454 if (!(node->flags & NODE_FLAGS_DISCONNECTED)) {
455 DEBUG(DEBUG_INFO,("%s: node %s is already marked connected: %u connected\n",
456 node->ctdb->name, node->name,
457 node->ctdb->num_connected));
460 node->ctdb->num_connected++;
461 node->dead_count = 0;
462 node->flags &= ~NODE_FLAGS_DISCONNECTED;
463 node->flags |= NODE_FLAGS_UNHEALTHY;
464 DEBUG(DEBUG_INFO,("%s: connected to %s - %u connected\n",
465 node->ctdb->name, node->name, node->ctdb->num_connected));
469 struct ctdb_context *ctdb;
470 struct ctdb_req_header *hdr;
475 triggered when a deferred packet is due
477 static void queue_next_trigger(struct event_context *ev, struct timed_event *te,
478 struct timeval t, void *private_data)
480 struct queue_next *q = talloc_get_type(private_data, struct queue_next);
481 ctdb_input_pkt(q->ctdb, q->hdr);
486 defer a packet, so it is processed on the next event loop
487 this is used for sending packets to ourselves
489 static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
491 struct queue_next *q;
492 q = talloc(ctdb, struct queue_next);
494 DEBUG(DEBUG_ERR,(__location__ " Failed to allocate deferred packet\n"));
498 q->hdr = talloc_memdup(ctdb, hdr, hdr->length);
499 if (q->hdr == NULL) {
500 DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n"));
504 /* use this to put packets directly into our recv function */
505 ctdb_input_pkt(q->ctdb, q->hdr);
507 event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
513 broadcast a packet to all nodes
515 static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb,
516 struct ctdb_req_header *hdr)
519 for (i=0; i < ctdb->num_nodes; i++) {
520 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
523 hdr->destnode = ctdb->nodes[i]->pnn;
524 ctdb_queue_packet(ctdb, hdr);
529 broadcast a packet to all nodes in the current vnnmap
531 static void ctdb_broadcast_packet_vnnmap(struct ctdb_context *ctdb,
532 struct ctdb_req_header *hdr)
535 for (i=0;i<ctdb->vnn_map->size;i++) {
536 hdr->destnode = ctdb->vnn_map->map[i];
537 ctdb_queue_packet(ctdb, hdr);
542 broadcast a packet to all connected nodes
544 static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb,
545 struct ctdb_req_header *hdr)
548 for (i=0; i < ctdb->num_nodes; i++) {
549 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
552 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) {
553 hdr->destnode = ctdb->nodes[i]->pnn;
554 ctdb_queue_packet(ctdb, hdr);
560 queue a packet or die
562 void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
564 struct ctdb_node *node;
566 switch (hdr->destnode) {
567 case CTDB_BROADCAST_ALL:
568 ctdb_broadcast_packet_all(ctdb, hdr);
570 case CTDB_BROADCAST_VNNMAP:
571 ctdb_broadcast_packet_vnnmap(ctdb, hdr);
573 case CTDB_BROADCAST_CONNECTED:
574 ctdb_broadcast_packet_connected(ctdb, hdr);
578 CTDB_INCREMENT_STAT(ctdb, node_packets_sent);
580 if (!ctdb_validate_pnn(ctdb, hdr->destnode)) {
581 DEBUG(DEBUG_CRIT,(__location__ " cant send to node %u that does not exist\n",
586 node = ctdb->nodes[hdr->destnode];
588 if (node->flags & NODE_FLAGS_DELETED) {
589 DEBUG(DEBUG_ERR, (__location__ " Can not queue packet to DELETED node %d\n", hdr->destnode));
593 if (node->pnn == ctdb->pnn) {
594 ctdb_defer_packet(ctdb, hdr);
598 if (ctdb->methods == NULL) {
599 DEBUG(DEBUG_ALERT, (__location__ " Can not queue packet. "
600 "Transport is DOWN\n"));
605 if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
606 ctdb_fatal(ctdb, "Unable to queue packet\n");
614 a valgrind hack to allow us to get opcode specific backtraces
615 very ugly, and relies on no compiler optimisation!
617 void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode)
620 #define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break
722 ctdb_queue_packet(ctdb, hdr);