2 ctdb main protocol code
4 Copyright (C) Andrew Tridgell 2006
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/events/events.h"
24 #include "lib/util/dlinklist.h"
25 #include "system/network.h"
26 #include "system/filesys.h"
27 #include "../include/ctdb_private.h"
30 choose the transport we will use
32 int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
34 ctdb->transport = talloc_strdup(ctdb, transport);
42 void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags)
50 void ctdb_clear_flags(struct ctdb_context *ctdb, unsigned flags)
52 ctdb->flags &= ~flags;
56 set max acess count before a dmaster migration
58 void ctdb_set_max_lacount(struct ctdb_context *ctdb, unsigned count)
60 ctdb->max_lacount = count;
64 set the directory for the local databases
66 int ctdb_set_tdb_dir(struct ctdb_context *ctdb, const char *dir)
69 ctdb->db_directory = talloc_asprintf(ctdb, "ctdb-%u", ctdb_get_vnn(ctdb));
71 ctdb->db_directory = talloc_strdup(ctdb, dir);
73 if (ctdb->db_directory == NULL) {
80 add a node to the list of active nodes
82 static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
84 struct ctdb_node *node, **nodep;
86 nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1);
87 CTDB_NO_MEMORY(ctdb, nodep);
90 nodep = &ctdb->nodes[ctdb->num_nodes];
91 (*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node);
92 CTDB_NO_MEMORY(ctdb, *nodep);
95 if (ctdb_parse_address(ctdb, node, nstr, &node->address) != 0) {
99 node->name = talloc_asprintf(node, "%s:%u",
100 node->address.address,
102 /* for now we just set the vnn to the line in the file - this
104 node->vnn = ctdb->num_nodes;
106 if (ctdb_same_address(&ctdb->address, &node->address)) {
107 ctdb->vnn = node->vnn;
108 node->flags |= NODE_FLAGS_CONNECTED;
117 setup the node list from a file
119 int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist)
125 lines = file_lines_load(nlist, &nlines, ctdb);
127 ctdb_set_error(ctdb, "Failed to load nlist '%s'\n", nlist);
131 for (i=0;i<nlines;i++) {
132 if (ctdb_add_node(ctdb, lines[i]) != 0) {
143 setup the local node address
145 int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
147 if (ctdb_parse_address(ctdb, ctdb, address, &ctdb->address) != 0) {
151 ctdb->name = talloc_asprintf(ctdb, "%s:%u",
152 ctdb->address.address,
159 setup the local socket name
161 int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
163 ctdb->daemon.name = talloc_strdup(ctdb, socketname);
168 add a node to the list of active nodes
170 int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, int id)
172 struct ctdb_registered_call *call;
174 call = talloc(ctdb_db, struct ctdb_registered_call);
178 DLIST_ADD(ctdb_db->calls, call);
183 return the vnn of this node
185 uint32_t ctdb_get_vnn(struct ctdb_context *ctdb)
191 return the number of nodes
193 uint32_t ctdb_get_num_nodes(struct ctdb_context *ctdb)
195 return ctdb->num_nodes;
200 called by the transport layer when a packet comes in
202 void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length)
204 struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
207 ctdb->status.node_packets_recv++;
209 /* place the packet as a child of the tmp_ctx. We then use
210 talloc_free() below to free it. If any of the calls want
211 to keep it, then they will steal it somewhere else, and the
212 talloc_free() will only free the tmp_ctx */
213 tmp_ctx = talloc_new(ctdb);
214 talloc_steal(tmp_ctx, hdr);
216 if (length < sizeof(*hdr)) {
217 ctdb_set_error(ctdb, "Bad packet length %d\n", length);
220 if (length != hdr->length) {
221 ctdb_set_error(ctdb, "Bad header length %d expected %d\n",
222 hdr->length, length);
226 if (hdr->ctdb_magic != CTDB_MAGIC) {
227 ctdb_set_error(ctdb, "Non CTDB packet rejected\n");
231 if (hdr->ctdb_version != CTDB_VERSION) {
232 ctdb_set_error(ctdb, "Bad CTDB version 0x%x rejected\n", hdr->ctdb_version);
236 DEBUG(3,(__location__ " ctdb request %d of type %d length %d from "
237 "node %d to %d\n", hdr->reqid, hdr->operation, hdr->length,
238 hdr->srcnode, hdr->destnode));
240 switch (hdr->operation) {
242 /* verify that the remote node that sent us the call
243 is running in the same generation instance as this node
245 if (ctdb->vnn_map->generation != hdr->generation) {
246 DEBUG(0,(__location__ " ctdb request %d of type"
247 " %d length %d from node %d to %d had an"
248 " invalid generation id:%d while our"
249 " generation id is:%d\n",
250 hdr->reqid, hdr->operation, hdr->length,
251 hdr->srcnode, hdr->destnode,
252 ctdb->vnn_map->generation,
256 /* if we are in recovery mode we discard all traffic
257 until the cluster has recovered.
259 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
260 DEBUG(0,(__location__ " ctdb request %d of type"
261 " %d length %d from node %d to %d"
262 " while we are in recovery mode\n",
263 hdr->reqid, hdr->operation, hdr->length,
264 hdr->srcnode, hdr->destnode));
268 ctdb->status.count.req_call++;
269 ctdb_request_call(ctdb, hdr);
272 case CTDB_REPLY_CALL:
273 ctdb->status.count.reply_call++;
274 ctdb_reply_call(ctdb, hdr);
277 case CTDB_REPLY_ERROR:
278 ctdb->status.count.reply_error++;
279 ctdb_reply_error(ctdb, hdr);
282 case CTDB_REQ_DMASTER:
283 ctdb->status.count.req_dmaster++;
284 ctdb_request_dmaster(ctdb, hdr);
287 case CTDB_REPLY_DMASTER:
288 ctdb->status.count.reply_dmaster++;
289 ctdb_reply_dmaster(ctdb, hdr);
292 case CTDB_REQ_MESSAGE:
293 ctdb->status.count.req_message++;
294 ctdb_request_message(ctdb, hdr);
297 case CTDB_REQ_FINISHED:
298 ctdb->status.count.req_finished++;
299 ctdb_request_finished(ctdb, hdr);
302 case CTDB_REQ_CONTROL:
303 ctdb->status.count.req_control++;
304 ctdb_request_control(ctdb, hdr);
307 case CTDB_REPLY_CONTROL:
308 ctdb->status.count.reply_control++;
309 ctdb_reply_control(ctdb, hdr);
313 DEBUG(0,("%s: Packet with unknown operation %d\n",
314 __location__, hdr->operation));
319 talloc_free(tmp_ctx);
323 called by the transport layer when a packet comes in
325 void ctdb_recv_raw_pkt(void *p, uint8_t *data, uint32_t length)
327 struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
328 ctdb_recv_pkt(ctdb, data, length);
332 called by the transport layer when a node is dead
334 static void ctdb_node_dead(struct ctdb_node *node)
336 node->ctdb->num_connected--;
337 node->flags &= ~NODE_FLAGS_CONNECTED;
338 DEBUG(1,("%s: node %s is dead: %d connected\n",
339 node->ctdb->name, node->name, node->ctdb->num_connected));
343 called by the transport layer when a node is connected
345 static void ctdb_node_connected(struct ctdb_node *node)
347 node->ctdb->num_connected++;
348 node->flags |= NODE_FLAGS_CONNECTED;
349 DEBUG(1,("%s: connected to %s - %d connected\n",
350 node->ctdb->name, node->name, node->ctdb->num_connected));
354 wait for all nodes to be connected
356 void ctdb_daemon_connect_wait(struct ctdb_context *ctdb)
358 int expected = ctdb->num_nodes - 1;
359 if (ctdb->flags & CTDB_FLAG_SELF_CONNECT) {
362 while (ctdb->num_connected != expected) {
363 DEBUG(3,("ctdb_connect_wait: waiting for %d nodes (have %d)\n",
364 expected, ctdb->num_connected));
365 event_loop_once(ctdb->ev);
367 DEBUG(3,("ctdb_connect_wait: got all %d nodes\n", expected));
371 struct ctdb_context *ctdb;
372 struct ctdb_req_header *hdr;
377 trigered when a deferred packet is due
379 static void queue_next_trigger(struct event_context *ev, struct timed_event *te,
380 struct timeval t, void *private_data)
382 struct queue_next *q = talloc_get_type(private_data, struct queue_next);
383 ctdb_recv_pkt(q->ctdb, (uint8_t *)q->hdr, q->hdr->length);
388 defer a packet, so it is processed on the next event loop
389 this is used for sending packets to ourselves
391 static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
393 struct queue_next *q;
394 q = talloc(ctdb, struct queue_next);
396 DEBUG(0,(__location__ " Failed to allocate deferred packet\n"));
400 q->hdr = talloc_memdup(ctdb, hdr, hdr->length);
401 if (q->hdr == NULL) {
402 DEBUG(0,("Error copying deferred packet to self\n"));
406 /* use this to put packets directly into our recv function */
407 ctdb_recv_pkt(q->ctdb, (uint8_t *)q->hdr, q->hdr->length);
410 event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
415 queue a packet or die
417 void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
419 struct ctdb_node *node;
420 ctdb->status.node_packets_sent++;
422 if (!ctdb_validate_vnn(ctdb, hdr->destnode)) {
423 DEBUG(0,(__location__ " cant send to node %u that does not exist\n",
428 node = ctdb->nodes[hdr->destnode];
430 if (hdr->destnode == ctdb->vnn && !(ctdb->flags & CTDB_FLAG_SELF_CONNECT)) {
431 ctdb_defer_packet(ctdb, hdr);
432 } else if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
433 ctdb_fatal(ctdb, "Unable to queue packet\n");
438 static const struct ctdb_upcalls ctdb_upcalls = {
439 .recv_pkt = ctdb_recv_pkt,
440 .node_dead = ctdb_node_dead,
441 .node_connected = ctdb_node_connected
445 initialise the ctdb daemon.
447 NOTE: In current code the daemon does not fork. This is for testing purposes only
448 and to simplify the code.
450 struct ctdb_context *ctdb_init(struct event_context *ev)
452 struct ctdb_context *ctdb;
454 ctdb = talloc_zero(ev, struct ctdb_context);
456 ctdb->recovery_mode = CTDB_RECOVERY_NORMAL;
457 ctdb->upcalls = &ctdb_upcalls;
458 ctdb->idr = idr_init(ctdb);
459 ctdb->max_lacount = CTDB_DEFAULT_MAX_LACOUNT;