2 ctdb main protocol code
4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/tdb/include/tdb.h"
22 #include "lib/events/events.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "../include/ctdb_private.h"
29 choose the transport we will use
31 int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
33 ctdb->transport = talloc_strdup(ctdb, transport);
38 Check whether an ip is a valid node ip
39 Returns the node id for this ip address or -1
41 int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const char *nodeip)
45 for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) {
46 if (!strcmp(ctdb->nodes[nodeid]->address.address, nodeip)) {
55 choose the recovery lock file
57 int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file)
59 ctdb->recovery_lock_file = talloc_strdup(ctdb, file);
64 set the directory for the local databases
66 int ctdb_set_tdb_dir(struct ctdb_context *ctdb, const char *dir)
68 ctdb->db_directory = talloc_strdup(ctdb, dir);
69 if (ctdb->db_directory == NULL) {
76 set the directory for the persistent databases
78 int ctdb_set_tdb_dir_persistent(struct ctdb_context *ctdb, const char *dir)
80 ctdb->db_directory_persistent = talloc_strdup(ctdb, dir);
81 if (ctdb->db_directory_persistent == NULL) {
88 add a node to the list of active nodes
90 static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
92 struct ctdb_node *node, **nodep;
94 nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1);
95 CTDB_NO_MEMORY(ctdb, nodep);
98 nodep = &ctdb->nodes[ctdb->num_nodes];
99 (*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node);
100 CTDB_NO_MEMORY(ctdb, *nodep);
103 if (ctdb_parse_address(ctdb, node, nstr, &node->address) != 0) {
107 node->name = talloc_asprintf(node, "%s:%u",
108 node->address.address,
110 /* this assumes that the nodes are kept in sorted order, and no gaps */
111 node->pnn = ctdb->num_nodes;
113 /* nodes start out disconnected and unhealthy */
114 node->flags = (NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY);
116 if (ctdb->address.address &&
117 ctdb_same_address(&ctdb->address, &node->address)) {
118 /* for automatic binding to interfaces, see tcp_connect.c */
119 ctdb->pnn = node->pnn;
120 node->flags &= ~NODE_FLAGS_DISCONNECTED;
122 /* do we start out in DISABLED mode? */
123 if (ctdb->start_as_disabled != 0) {
124 DEBUG(DEBUG_INFO, ("This node is configured to start in DISABLED state\n"));
125 node->flags |= NODE_FLAGS_DISABLED;
130 node->dead_count = 0;
136 setup the node list from a file
138 int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist)
144 talloc_free(ctdb->nodes);
148 talloc_free(ctdb->node_list_file);
149 ctdb->node_list_file = talloc_strdup(ctdb, nlist);
151 lines = file_lines_load(nlist, &nlines, ctdb);
153 ctdb_set_error(ctdb, "Failed to load nlist '%s'\n", nlist);
156 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
160 for (i=0;i<nlines;i++) {
164 /* strip leading spaces */
165 while((*node == ' ') || (*node == '\t')) {
171 if (strcmp(node, "") == 0) {
174 if (ctdb_add_node(ctdb, node) != 0) {
180 /* initialize the vnn mapping table now that we have num_nodes setup */
181 ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map);
182 CTDB_NO_MEMORY(ctdb, ctdb->vnn_map);
184 ctdb->vnn_map->generation = INVALID_GENERATION;
185 ctdb->vnn_map->size = ctdb->num_nodes;
186 ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, ctdb->vnn_map->size);
187 CTDB_NO_MEMORY(ctdb, ctdb->vnn_map->map);
189 for(i=0;i<ctdb->vnn_map->size;i++) {
190 ctdb->vnn_map->map[i] = i;
199 setup the local node address
201 int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
203 if (ctdb_parse_address(ctdb, ctdb, address, &ctdb->address) != 0) {
207 ctdb->name = talloc_asprintf(ctdb, "%s:%u",
208 ctdb->address.address,
215 return the number of active nodes
217 uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb)
221 for (i=0;i<ctdb->vnn_map->size;i++) {
222 struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
223 if (!(node->flags & NODE_FLAGS_INACTIVE)) {
232 called when we need to process a packet. This can be a requeued packet
233 after a lockwait, or a real packet from another node
235 void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
239 /* place the packet as a child of the tmp_ctx. We then use
240 talloc_free() below to free it. If any of the calls want
241 to keep it, then they will steal it somewhere else, and the
242 talloc_free() will only free the tmp_ctx */
243 tmp_ctx = talloc_new(ctdb);
244 talloc_steal(tmp_ctx, hdr);
246 DEBUG(DEBUG_DEBUG,(__location__ " ctdb request %u of type %u length %u from "
247 "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
248 hdr->srcnode, hdr->destnode));
250 switch (hdr->operation) {
252 case CTDB_REPLY_CALL:
253 case CTDB_REQ_DMASTER:
254 case CTDB_REPLY_DMASTER:
255 /* we dont allow these calls when banned */
256 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_BANNED) {
257 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
259 " length %u from node %u to %u while node"
261 hdr->operation, hdr->reqid,
263 hdr->srcnode, hdr->destnode));
267 /* for ctdb_call inter-node operations verify that the
268 remote node that sent us the call is running in the
269 same generation instance as this node
271 if (ctdb->vnn_map->generation != hdr->generation) {
272 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
274 " length %u from node %u to %u had an"
275 " invalid generation id:%u while our"
276 " generation id is:%u\n",
277 hdr->operation, hdr->reqid,
279 hdr->srcnode, hdr->destnode,
280 hdr->generation, ctdb->vnn_map->generation));
285 switch (hdr->operation) {
287 ctdb->statistics.node.req_call++;
288 ctdb_request_call(ctdb, hdr);
291 case CTDB_REPLY_CALL:
292 ctdb->statistics.node.reply_call++;
293 ctdb_reply_call(ctdb, hdr);
296 case CTDB_REPLY_ERROR:
297 ctdb->statistics.node.reply_error++;
298 ctdb_reply_error(ctdb, hdr);
301 case CTDB_REQ_DMASTER:
302 ctdb->statistics.node.req_dmaster++;
303 ctdb_request_dmaster(ctdb, hdr);
306 case CTDB_REPLY_DMASTER:
307 ctdb->statistics.node.reply_dmaster++;
308 ctdb_reply_dmaster(ctdb, hdr);
311 case CTDB_REQ_MESSAGE:
312 ctdb->statistics.node.req_message++;
313 ctdb_request_message(ctdb, hdr);
316 case CTDB_REQ_CONTROL:
317 ctdb->statistics.node.req_control++;
318 ctdb_request_control(ctdb, hdr);
321 case CTDB_REPLY_CONTROL:
322 ctdb->statistics.node.reply_control++;
323 ctdb_reply_control(ctdb, hdr);
326 case CTDB_REQ_KEEPALIVE:
327 ctdb->statistics.keepalive_packets_recv++;
331 DEBUG(DEBUG_CRIT,("%s: Packet with unknown operation %u\n",
332 __location__, hdr->operation));
337 talloc_free(tmp_ctx);
342 called by the transport layer when a node is dead
344 void ctdb_node_dead(struct ctdb_node *node)
346 if (node->flags & NODE_FLAGS_DISCONNECTED) {
347 DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n",
348 node->ctdb->name, node->name,
349 node->ctdb->num_connected));
352 node->ctdb->num_connected--;
353 node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY;
355 node->dead_count = 0;
357 DEBUG(DEBUG_NOTICE,("%s: node %s is dead: %u connected\n",
358 node->ctdb->name, node->name, node->ctdb->num_connected));
359 ctdb_daemon_cancel_controls(node->ctdb, node);
361 if (node->ctdb->methods == NULL) {
362 DEBUG(DEBUG_ALERT,(__location__ " Can not restart transport. ctdb->methods==NULL\n"));
363 ctdb_fatal(node->ctdb, "can not restart transport.");
366 node->ctdb->methods->restart(node);
370 called by the transport layer when a node is connected
372 void ctdb_node_connected(struct ctdb_node *node)
374 if (!(node->flags & NODE_FLAGS_DISCONNECTED)) {
375 DEBUG(DEBUG_INFO,("%s: node %s is already marked connected: %u connected\n",
376 node->ctdb->name, node->name,
377 node->ctdb->num_connected));
380 node->ctdb->num_connected++;
381 node->dead_count = 0;
382 node->flags &= ~NODE_FLAGS_DISCONNECTED;
383 node->flags |= NODE_FLAGS_UNHEALTHY;
384 DEBUG(DEBUG_INFO,("%s: connected to %s - %u connected\n",
385 node->ctdb->name, node->name, node->ctdb->num_connected));
389 struct ctdb_context *ctdb;
390 struct ctdb_req_header *hdr;
395 trigered when a deferred packet is due
397 static void queue_next_trigger(struct event_context *ev, struct timed_event *te,
398 struct timeval t, void *private_data)
400 struct queue_next *q = talloc_get_type(private_data, struct queue_next);
401 ctdb_input_pkt(q->ctdb, q->hdr);
406 defer a packet, so it is processed on the next event loop
407 this is used for sending packets to ourselves
409 static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
411 struct queue_next *q;
412 q = talloc(ctdb, struct queue_next);
414 DEBUG(DEBUG_ERR,(__location__ " Failed to allocate deferred packet\n"));
418 q->hdr = talloc_memdup(ctdb, hdr, hdr->length);
419 if (q->hdr == NULL) {
420 DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n"));
424 /* use this to put packets directly into our recv function */
425 ctdb_input_pkt(q->ctdb, q->hdr);
427 event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
433 broadcast a packet to all nodes
435 static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb,
436 struct ctdb_req_header *hdr)
439 for (i=0;i<ctdb->num_nodes;i++) {
440 hdr->destnode = ctdb->nodes[i]->pnn;
441 ctdb_queue_packet(ctdb, hdr);
446 broadcast a packet to all nodes in the current vnnmap
448 static void ctdb_broadcast_packet_vnnmap(struct ctdb_context *ctdb,
449 struct ctdb_req_header *hdr)
452 for (i=0;i<ctdb->vnn_map->size;i++) {
453 hdr->destnode = ctdb->vnn_map->map[i];
454 ctdb_queue_packet(ctdb, hdr);
459 broadcast a packet to all connected nodes
461 static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb,
462 struct ctdb_req_header *hdr)
465 for (i=0;i<ctdb->num_nodes;i++) {
466 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) {
467 hdr->destnode = ctdb->nodes[i]->pnn;
468 ctdb_queue_packet(ctdb, hdr);
474 queue a packet or die
476 void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
478 struct ctdb_node *node;
480 switch (hdr->destnode) {
481 case CTDB_BROADCAST_ALL:
482 ctdb_broadcast_packet_all(ctdb, hdr);
484 case CTDB_BROADCAST_VNNMAP:
485 ctdb_broadcast_packet_vnnmap(ctdb, hdr);
487 case CTDB_BROADCAST_CONNECTED:
488 ctdb_broadcast_packet_connected(ctdb, hdr);
492 ctdb->statistics.node_packets_sent++;
494 if (!ctdb_validate_pnn(ctdb, hdr->destnode)) {
495 DEBUG(DEBUG_CRIT,(__location__ " cant send to node %u that does not exist\n",
500 node = ctdb->nodes[hdr->destnode];
502 if (hdr->destnode == ctdb->pnn) {
503 ctdb_defer_packet(ctdb, hdr);
505 if (ctdb->methods == NULL) {
506 DEBUG(DEBUG_ALERT, (__location__ " Can not queue packet. Transport is DOWN\n"));
511 if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
512 ctdb_fatal(ctdb, "Unable to queue packet\n");
521 a valgrind hack to allow us to get opcode specific backtraces
522 very ugly, and relies on no compiler optimisation!
524 void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode)
527 #define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break
629 ctdb_queue_packet(ctdb, hdr);