4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/tevent/tevent.h"
24 #include "lib/util/dlinklist.h"
25 #include "system/network.h"
26 #include "system/filesys.h"
27 #include "system/wait.h"
28 #include "../include/ctdb_client.h"
29 #include "../include/ctdb_private.h"
30 #include <sys/socket.h>
32 struct ctdb_client_pid_list {
33 struct ctdb_client_pid_list *next, *prev;
34 struct ctdb_context *ctdb;
36 struct ctdb_client *client;
39 static void daemon_incoming_packet(void *, struct ctdb_req_header *);
41 static void print_exit_message(void)
43 DEBUG(DEBUG_NOTICE,("CTDB daemon shutting down\n"));
48 static void ctdb_time_tick(struct event_context *ev, struct timed_event *te,
49 struct timeval t, void *private_data)
51 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
53 if (getpid() != ctdbd_pid) {
57 event_add_timed(ctdb->ev, ctdb,
58 timeval_current_ofs(1, 0),
59 ctdb_time_tick, ctdb);
62 /* Used to trigger a dummy event once per second, to make
63 * detection of hangs more reliable.
65 static void ctdb_start_time_tickd(struct ctdb_context *ctdb)
67 event_add_timed(ctdb->ev, ctdb,
68 timeval_current_ofs(1, 0),
69 ctdb_time_tick, ctdb);
73 /* called when the "startup" event script has finished */
74 static void ctdb_start_transport(struct ctdb_context *ctdb)
76 if (ctdb->methods == NULL) {
77 DEBUG(DEBUG_ALERT,(__location__ " startup event finished but transport is DOWN.\n"));
78 ctdb_fatal(ctdb, "transport is not initialized but startup completed");
81 /* start the transport running */
82 if (ctdb->methods->start(ctdb) != 0) {
83 DEBUG(DEBUG_ALERT,("transport failed to start!\n"));
84 ctdb_fatal(ctdb, "transport failed to start");
87 /* start the recovery daemon process */
88 if (ctdb_start_recoverd(ctdb) != 0) {
89 DEBUG(DEBUG_ALERT,("Failed to start recovery daemon\n"));
93 /* Make sure we log something when the daemon terminates */
94 atexit(print_exit_message);
96 /* start monitoring for connected/disconnected nodes */
97 ctdb_start_keepalive(ctdb);
99 /* start monitoring for node health */
100 ctdb_start_monitoring(ctdb);
102 /* start periodic update of tcp tickle lists */
103 ctdb_start_tcp_tickle_update(ctdb);
105 /* start listening for recovery daemon pings */
106 ctdb_control_recd_ping(ctdb);
108 /* start listening to timer ticks */
109 ctdb_start_time_tickd(ctdb);
112 static void block_signal(int signum)
114 struct sigaction act;
116 memset(&act, 0, sizeof(act));
118 act.sa_handler = SIG_IGN;
119 sigemptyset(&act.sa_mask);
120 sigaddset(&act.sa_mask, signum);
121 sigaction(signum, &act, NULL);
126 send a packet to a client
128 static int daemon_queue_send(struct ctdb_client *client, struct ctdb_req_header *hdr)
130 CTDB_INCREMENT_STAT(client->ctdb, client_packets_sent);
131 if (hdr->operation == CTDB_REQ_MESSAGE) {
132 if (ctdb_queue_length(client->queue) > client->ctdb->tunable.max_queue_depth_drop_msg) {
133 DEBUG(DEBUG_ERR,("CTDB_REQ_MESSAGE queue full - killing client connection.\n"));
138 return ctdb_queue_send(client->queue, (uint8_t *)hdr, hdr->length);
142 message handler for when we are in daemon mode. This redirects the message
145 static void daemon_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
146 TDB_DATA data, void *private_data)
148 struct ctdb_client *client = talloc_get_type(private_data, struct ctdb_client);
149 struct ctdb_req_message *r;
152 /* construct a message to send to the client containing the data */
153 len = offsetof(struct ctdb_req_message, data) + data.dsize;
154 r = ctdbd_allocate_pkt(ctdb, ctdb, CTDB_REQ_MESSAGE,
155 len, struct ctdb_req_message);
156 CTDB_NO_MEMORY_VOID(ctdb, r);
158 talloc_set_name_const(r, "req_message packet");
161 r->datalen = data.dsize;
162 memcpy(&r->data[0], data.dptr, data.dsize);
164 daemon_queue_send(client, &r->hdr);
170 this is called when the ctdb daemon received a ctdb request to
171 set the srvid from the client
173 int daemon_register_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid)
175 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
177 if (client == NULL) {
178 DEBUG(DEBUG_ERR,("Bad client_id in daemon_request_register_message_handler\n"));
181 res = ctdb_register_message_handler(ctdb, client, srvid, daemon_message_handler, client);
183 DEBUG(DEBUG_ERR,(__location__ " Failed to register handler %llu in daemon\n",
184 (unsigned long long)srvid));
186 DEBUG(DEBUG_INFO,(__location__ " Registered message handler for srvid=%llu\n",
187 (unsigned long long)srvid));
194 this is called when the ctdb daemon received a ctdb request to
195 remove a srvid from the client
197 int daemon_deregister_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid)
199 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
200 if (client == NULL) {
201 DEBUG(DEBUG_ERR,("Bad client_id in daemon_request_deregister_message_handler\n"));
204 return ctdb_deregister_message_handler(ctdb, srvid, client);
209 destroy a ctdb_client
211 static int ctdb_client_destructor(struct ctdb_client *client)
213 struct ctdb_db_context *ctdb_db;
215 ctdb_takeover_client_destructor_hook(client);
216 ctdb_reqid_remove(client->ctdb, client->client_id);
217 CTDB_DECREMENT_STAT(client->ctdb, num_clients);
219 if (client->num_persistent_updates != 0) {
220 DEBUG(DEBUG_ERR,(__location__ " Client disconnecting with %u persistent updates in flight. Starting recovery\n", client->num_persistent_updates));
221 client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
223 ctdb_db = find_ctdb_db(client->ctdb, client->db_id);
225 DEBUG(DEBUG_ERR, (__location__ " client exit while transaction "
226 "commit active. Forcing recovery.\n"));
227 client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
229 /* legacy trans2 transaction state: */
230 ctdb_db->transaction_active = false;
233 * trans3 transaction state:
235 * The destructor sets the pointer to NULL.
237 talloc_free(ctdb_db->persistent_state);
245 this is called when the ctdb daemon received a ctdb request message
246 from a local client over the unix domain socket
248 static void daemon_request_message_from_client(struct ctdb_client *client,
249 struct ctdb_req_message *c)
254 /* maybe the message is for another client on this node */
255 if (ctdb_get_pnn(client->ctdb)==c->hdr.destnode) {
256 ctdb_request_message(client->ctdb, (struct ctdb_req_header *)c);
260 /* its for a remote node */
261 data.dptr = &c->data[0];
262 data.dsize = c->datalen;
263 res = ctdb_daemon_send_message(client->ctdb, c->hdr.destnode,
266 DEBUG(DEBUG_ERR,(__location__ " Failed to send message to remote node %u\n",
272 struct daemon_call_state {
273 struct ctdb_client *client;
275 struct ctdb_call *call;
276 struct timeval start_time;
280 complete a call from a client
282 static void daemon_call_from_client_callback(struct ctdb_call_state *state)
284 struct daemon_call_state *dstate = talloc_get_type(state->async.private_data,
285 struct daemon_call_state);
286 struct ctdb_reply_call *r;
289 struct ctdb_client *client = dstate->client;
290 struct ctdb_db_context *ctdb_db = state->ctdb_db;
292 talloc_steal(client, dstate);
293 talloc_steal(dstate, dstate->call);
295 res = ctdb_daemon_call_recv(state, dstate->call);
297 DEBUG(DEBUG_ERR, (__location__ " ctdbd_call_recv() returned error\n"));
298 CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
300 CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 1", call_latency, dstate->start_time);
304 length = offsetof(struct ctdb_reply_call, data) + dstate->call->reply_data.dsize;
305 r = ctdbd_allocate_pkt(client->ctdb, dstate, CTDB_REPLY_CALL,
306 length, struct ctdb_reply_call);
308 DEBUG(DEBUG_ERR, (__location__ " Failed to allocate reply_call in ctdb daemon\n"));
309 CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
310 CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 2", call_latency, dstate->start_time);
313 r->hdr.reqid = dstate->reqid;
314 r->datalen = dstate->call->reply_data.dsize;
315 r->status = dstate->call->status;
316 memcpy(&r->data[0], dstate->call->reply_data.dptr, r->datalen);
318 res = daemon_queue_send(client, &r->hdr);
320 /* client is dead - return immediately */
324 DEBUG(DEBUG_ERR, (__location__ " Failed to queue packet from daemon to client\n"));
326 CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 3", call_latency, dstate->start_time);
327 CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
331 struct ctdb_daemon_packet_wrap {
332 struct ctdb_context *ctdb;
337 a wrapper to catch disconnected clients
339 static void daemon_incoming_packet_wrap(void *p, struct ctdb_req_header *hdr)
341 struct ctdb_client *client;
342 struct ctdb_daemon_packet_wrap *w = talloc_get_type(p,
343 struct ctdb_daemon_packet_wrap);
345 DEBUG(DEBUG_CRIT,(__location__ " Bad packet type '%s'\n", talloc_get_name(p)));
349 client = ctdb_reqid_find(w->ctdb, w->client_id, struct ctdb_client);
350 if (client == NULL) {
351 DEBUG(DEBUG_ERR,(__location__ " Packet for disconnected client %u\n",
359 daemon_incoming_packet(client, hdr);
364 this is called when the ctdb daemon received a ctdb request call
365 from a local client over the unix domain socket
367 static void daemon_request_call_from_client(struct ctdb_client *client,
368 struct ctdb_req_call *c)
370 struct ctdb_call_state *state;
371 struct ctdb_db_context *ctdb_db;
372 struct daemon_call_state *dstate;
373 struct ctdb_call *call;
374 struct ctdb_ltdb_header header;
377 struct ctdb_context *ctdb = client->ctdb;
378 struct ctdb_daemon_packet_wrap *w;
380 CTDB_INCREMENT_STAT(ctdb, total_calls);
381 CTDB_DECREMENT_STAT(ctdb, pending_calls);
383 ctdb_db = find_ctdb_db(client->ctdb, c->db_id);
385 DEBUG(DEBUG_ERR, (__location__ " Unknown database in request. db_id==0x%08x",
387 CTDB_DECREMENT_STAT(ctdb, pending_calls);
391 if (ctdb_db->unhealthy_reason) {
393 * this is just a warning, as the tdb should be empty anyway,
394 * and only persistent databases can be unhealthy, which doesn't
395 * use this code patch
397 DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in daemon_request_call_from_client(): %s\n",
398 ctdb_db->db_name, ctdb_db->unhealthy_reason));
402 key.dsize = c->keylen;
404 w = talloc(ctdb, struct ctdb_daemon_packet_wrap);
405 CTDB_NO_MEMORY_VOID(ctdb, w);
408 w->client_id = client->client_id;
410 ret = ctdb_ltdb_lock_fetch_requeue(ctdb_db, key, &header,
411 (struct ctdb_req_header *)c, &data,
412 daemon_incoming_packet_wrap, w, True);
414 /* will retry later */
415 CTDB_DECREMENT_STAT(ctdb, pending_calls);
422 DEBUG(DEBUG_ERR,(__location__ " Unable to fetch record\n"));
423 CTDB_DECREMENT_STAT(ctdb, pending_calls);
427 /* Dont do READONLY if we dont have a tracking database */
428 if ((c->flags & CTDB_WANT_READONLY) && ctdb_db->rottdb == NULL) {
429 c->flags &= ~CTDB_WANT_READONLY;
432 if (header.flags & CTDB_REC_RO_REVOKE_COMPLETE) {
433 header.flags &= ~(CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY|CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_REVOKE_COMPLETE);
434 if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) {
435 ctdb_fatal(ctdb, "Failed to write header with cleared REVOKE flag");
439 /* if we are revoking, we must defer all other calls until the revoke
442 if (header.flags & CTDB_REC_RO_REVOKING_READONLY) {
443 talloc_free(data.dptr);
444 ret = ctdb_ltdb_unlock(ctdb_db, key);
446 if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, key, (struct ctdb_req_header *)c, daemon_incoming_packet, client) != 0) {
447 ctdb_fatal(ctdb, "Failed to add deferred call for revoke child");
452 if ((header.dmaster == ctdb->pnn)
453 && (!(c->flags & CTDB_WANT_READONLY))
454 && (header.flags & (CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY)) ) {
455 header.flags |= CTDB_REC_RO_REVOKING_READONLY;
456 if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) {
457 ctdb_fatal(ctdb, "Failed to store record with HAVE_DELEGATIONS set");
459 ret = ctdb_ltdb_unlock(ctdb_db, key);
461 if (ctdb_start_revoke_ro_record(ctdb, ctdb_db, key, &header, data) != 0) {
462 ctdb_fatal(ctdb, "Failed to start record revoke");
464 talloc_free(data.dptr);
466 if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, key, (struct ctdb_req_header *)c, daemon_incoming_packet, client) != 0) {
467 ctdb_fatal(ctdb, "Failed to add deferred call for revoke child");
473 dstate = talloc(client, struct daemon_call_state);
474 if (dstate == NULL) {
475 ret = ctdb_ltdb_unlock(ctdb_db, key);
477 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
480 DEBUG(DEBUG_ERR,(__location__ " Unable to allocate dstate\n"));
481 CTDB_DECREMENT_STAT(ctdb, pending_calls);
484 dstate->start_time = timeval_current();
485 dstate->client = client;
486 dstate->reqid = c->hdr.reqid;
487 talloc_steal(dstate, data.dptr);
489 call = dstate->call = talloc_zero(dstate, struct ctdb_call);
491 ret = ctdb_ltdb_unlock(ctdb_db, key);
493 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
496 DEBUG(DEBUG_ERR,(__location__ " Unable to allocate call\n"));
497 CTDB_DECREMENT_STAT(ctdb, pending_calls);
498 CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 1", call_latency, dstate->start_time);
502 call->call_id = c->callid;
504 call->call_data.dptr = c->data + c->keylen;
505 call->call_data.dsize = c->calldatalen;
506 call->flags = c->flags;
508 if (header.dmaster == ctdb->pnn) {
509 state = ctdb_call_local_send(ctdb_db, call, &header, &data);
511 state = ctdb_daemon_call_send_remote(ctdb_db, call, &header);
514 ret = ctdb_ltdb_unlock(ctdb_db, key);
516 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
520 DEBUG(DEBUG_ERR,(__location__ " Unable to setup call send\n"));
521 CTDB_DECREMENT_STAT(ctdb, pending_calls);
522 CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 2", call_latency, dstate->start_time);
525 talloc_steal(state, dstate);
526 talloc_steal(client, state);
528 state->async.fn = daemon_call_from_client_callback;
529 state->async.private_data = dstate;
533 static void daemon_request_control_from_client(struct ctdb_client *client,
534 struct ctdb_req_control *c);
536 /* data contains a packet from the client */
537 static void daemon_incoming_packet(void *p, struct ctdb_req_header *hdr)
539 struct ctdb_client *client = talloc_get_type(p, struct ctdb_client);
541 struct ctdb_context *ctdb = client->ctdb;
543 /* place the packet as a child of a tmp_ctx. We then use
544 talloc_free() below to free it. If any of the calls want
545 to keep it, then they will steal it somewhere else, and the
546 talloc_free() will be a no-op */
547 tmp_ctx = talloc_new(client);
548 talloc_steal(tmp_ctx, hdr);
550 if (hdr->ctdb_magic != CTDB_MAGIC) {
551 ctdb_set_error(client->ctdb, "Non CTDB packet rejected in daemon\n");
555 if (hdr->ctdb_version != CTDB_VERSION) {
556 ctdb_set_error(client->ctdb, "Bad CTDB version 0x%x rejected in daemon\n", hdr->ctdb_version);
560 switch (hdr->operation) {
562 CTDB_INCREMENT_STAT(ctdb, client.req_call);
563 daemon_request_call_from_client(client, (struct ctdb_req_call *)hdr);
566 case CTDB_REQ_MESSAGE:
567 CTDB_INCREMENT_STAT(ctdb, client.req_message);
568 daemon_request_message_from_client(client, (struct ctdb_req_message *)hdr);
571 case CTDB_REQ_CONTROL:
572 CTDB_INCREMENT_STAT(ctdb, client.req_control);
573 daemon_request_control_from_client(client, (struct ctdb_req_control *)hdr);
577 DEBUG(DEBUG_CRIT,(__location__ " daemon: unrecognized operation %u\n",
582 talloc_free(tmp_ctx);
586 called when the daemon gets a incoming packet
588 static void ctdb_daemon_read_cb(uint8_t *data, size_t cnt, void *args)
590 struct ctdb_client *client = talloc_get_type(args, struct ctdb_client);
591 struct ctdb_req_header *hdr;
598 CTDB_INCREMENT_STAT(client->ctdb, client_packets_recv);
600 if (cnt < sizeof(*hdr)) {
601 ctdb_set_error(client->ctdb, "Bad packet length %u in daemon\n",
605 hdr = (struct ctdb_req_header *)data;
606 if (cnt != hdr->length) {
607 ctdb_set_error(client->ctdb, "Bad header length %u expected %u\n in daemon",
608 (unsigned)hdr->length, (unsigned)cnt);
612 if (hdr->ctdb_magic != CTDB_MAGIC) {
613 ctdb_set_error(client->ctdb, "Non CTDB packet rejected\n");
617 if (hdr->ctdb_version != CTDB_VERSION) {
618 ctdb_set_error(client->ctdb, "Bad CTDB version 0x%x rejected in daemon\n", hdr->ctdb_version);
622 DEBUG(DEBUG_DEBUG,(__location__ " client request %u of type %u length %u from "
623 "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
624 hdr->srcnode, hdr->destnode));
626 /* it is the responsibility of the incoming packet function to free 'data' */
627 daemon_incoming_packet(client, hdr);
631 static int ctdb_clientpid_destructor(struct ctdb_client_pid_list *client_pid)
633 if (client_pid->ctdb->client_pids != NULL) {
634 DLIST_REMOVE(client_pid->ctdb->client_pids, client_pid);
641 static void ctdb_accept_client(struct event_context *ev, struct fd_event *fde,
642 uint16_t flags, void *private_data)
644 struct sockaddr_un addr;
647 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
648 struct ctdb_client *client;
649 struct ctdb_client_pid_list *client_pid;
651 struct peercred_struct cr;
652 socklen_t crl = sizeof(struct peercred_struct);
655 socklen_t crl = sizeof(struct ucred);
658 memset(&addr, 0, sizeof(addr));
660 fd = accept(ctdb->daemon.sd, (struct sockaddr *)&addr, &len);
666 set_close_on_exec(fd);
668 DEBUG(DEBUG_DEBUG,(__location__ " Created SOCKET FD:%d to connected child\n", fd));
670 client = talloc_zero(ctdb, struct ctdb_client);
672 if (getsockopt(fd, SOL_SOCKET, SO_PEERID, &cr, &crl) == 0) {
674 if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &crl) == 0) {
676 DEBUG(DEBUG_INFO,("Connected client with pid:%u\n", (unsigned)cr.pid));
681 client->client_id = ctdb_reqid_new(ctdb, client);
682 client->pid = cr.pid;
684 client_pid = talloc(client, struct ctdb_client_pid_list);
685 if (client_pid == NULL) {
686 DEBUG(DEBUG_ERR,("Failed to allocate client pid structure\n"));
691 client_pid->ctdb = ctdb;
692 client_pid->pid = cr.pid;
693 client_pid->client = client;
695 DLIST_ADD(ctdb->client_pids, client_pid);
697 client->queue = ctdb_queue_setup(ctdb, client, fd, CTDB_DS_ALIGNMENT,
698 ctdb_daemon_read_cb, client,
699 "client-%u", client->pid);
701 talloc_set_destructor(client, ctdb_client_destructor);
702 talloc_set_destructor(client_pid, ctdb_clientpid_destructor);
703 CTDB_INCREMENT_STAT(ctdb, num_clients);
709 create a unix domain socket and bind it
710 return a file descriptor open on the socket
712 static int ux_socket_bind(struct ctdb_context *ctdb)
714 struct sockaddr_un addr;
716 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
717 if (ctdb->daemon.sd == -1) {
721 set_close_on_exec(ctdb->daemon.sd);
722 set_nonblocking(ctdb->daemon.sd);
724 memset(&addr, 0, sizeof(addr));
725 addr.sun_family = AF_UNIX;
726 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
728 if (bind(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
729 DEBUG(DEBUG_CRIT,("Unable to bind on ctdb socket '%s'\n", ctdb->daemon.name));
733 if (chown(ctdb->daemon.name, geteuid(), getegid()) != 0 ||
734 chmod(ctdb->daemon.name, 0700) != 0) {
735 DEBUG(DEBUG_CRIT,("Unable to secure ctdb socket '%s', ctdb->daemon.name\n", ctdb->daemon.name));
740 if (listen(ctdb->daemon.sd, 100) != 0) {
741 DEBUG(DEBUG_CRIT,("Unable to listen on ctdb socket '%s'\n", ctdb->daemon.name));
748 close(ctdb->daemon.sd);
749 ctdb->daemon.sd = -1;
753 static void sig_child_handler(struct event_context *ev,
754 struct signal_event *se, int signum, int count,
758 // struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
763 pid = waitpid(-1, &status, WNOHANG);
765 DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
769 DEBUG(DEBUG_DEBUG, ("SIGCHLD from %d\n", (int)pid));
774 static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status,
778 ctdb_fatal(ctdb, "Failed to run setup event\n");
781 ctdb_run_notification_script(ctdb, "setup");
783 /* tell all other nodes we've just started up */
784 ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL,
785 0, CTDB_CONTROL_STARTUP, 0,
786 CTDB_CTRL_FLAG_NOREPLY,
787 tdb_null, NULL, NULL);
791 start the protocol going as a daemon
793 int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog, const char *public_address_list)
796 struct fd_event *fde;
797 const char *domain_socket_name;
798 struct signal_event *se;
800 /* get rid of any old sockets */
801 unlink(ctdb->daemon.name);
803 /* create a unix domain stream socket to listen to */
804 res = ux_socket_bind(ctdb);
806 DEBUG(DEBUG_ALERT,(__location__ " Failed to open CTDB unix domain socket\n"));
810 if (do_fork && fork()) {
814 tdb_reopen_all(False);
819 if (open("/dev/null", O_RDONLY) != 0) {
820 DEBUG(DEBUG_ALERT,(__location__ " Failed to setup stdin on /dev/null\n"));
824 block_signal(SIGPIPE);
826 ctdbd_pid = getpid();
827 ctdb->ctdbd_pid = ctdbd_pid;
830 DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid));
832 if (ctdb->do_setsched) {
833 /* try to set us up as realtime */
834 ctdb_set_scheduler(ctdb);
837 /* ensure the socket is deleted on exit of the daemon */
838 domain_socket_name = talloc_strdup(talloc_autofree_context(), ctdb->daemon.name);
839 if (domain_socket_name == NULL) {
840 DEBUG(DEBUG_ALERT,(__location__ " talloc_strdup failed.\n"));
844 ctdb->ev = event_context_init(NULL);
845 tevent_loop_allow_nesting(ctdb->ev);
846 ret = ctdb_init_tevent_logging(ctdb);
848 DEBUG(DEBUG_ALERT,("Failed to initialize TEVENT logging\n"));
852 ctdb_set_child_logging(ctdb);
854 /* initialize statistics collection */
855 ctdb_statistics_init(ctdb);
857 /* force initial recovery for election */
858 ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
860 if (strcmp(ctdb->transport, "tcp") == 0) {
861 int ctdb_tcp_init(struct ctdb_context *);
862 ret = ctdb_tcp_init(ctdb);
864 #ifdef USE_INFINIBAND
865 if (strcmp(ctdb->transport, "ib") == 0) {
866 int ctdb_ibw_init(struct ctdb_context *);
867 ret = ctdb_ibw_init(ctdb);
871 DEBUG(DEBUG_ERR,("Failed to initialise transport '%s'\n", ctdb->transport));
875 if (ctdb->methods == NULL) {
876 DEBUG(DEBUG_ALERT,(__location__ " Can not initialize transport. ctdb->methods is NULL\n"));
877 ctdb_fatal(ctdb, "transport is unavailable. can not initialize.");
880 /* initialise the transport */
881 if (ctdb->methods->initialise(ctdb) != 0) {
882 ctdb_fatal(ctdb, "transport failed to initialise");
884 if (public_address_list) {
885 ret = ctdb_set_public_addresses(ctdb, public_address_list);
887 DEBUG(DEBUG_ALERT,("Unable to setup public address list\n"));
893 /* attach to existing databases */
894 if (ctdb_attach_databases(ctdb) != 0) {
895 ctdb_fatal(ctdb, "Failed to attach to databases\n");
898 ret = ctdb_event_script(ctdb, CTDB_EVENT_INIT);
900 ctdb_fatal(ctdb, "Failed to run init event\n");
902 ctdb_run_notification_script(ctdb, "init");
904 /* start frozen, then let the first election sort things out */
905 if (ctdb_blocking_freeze(ctdb)) {
906 ctdb_fatal(ctdb, "Failed to get initial freeze\n");
909 /* now start accepting clients, only can do this once frozen */
910 fde = event_add_fd(ctdb->ev, ctdb, ctdb->daemon.sd,
912 ctdb_accept_client, ctdb);
913 tevent_fd_set_auto_close(fde);
915 /* release any IPs we hold from previous runs of the daemon */
916 if (ctdb->tunable.disable_ip_failover == 0) {
917 ctdb_release_all_ips(ctdb);
920 /* start the transport going */
921 ctdb_start_transport(ctdb);
923 /* set up a handler to pick up sigchld */
924 se = event_add_signal(ctdb->ev, ctdb,
929 DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD\n"));
933 ret = ctdb_event_script_callback(ctdb,
935 ctdb_setup_event_callback,
941 DEBUG(DEBUG_CRIT,("Failed to set up 'setup' event\n"));
946 if (start_syslog_daemon(ctdb)) {
947 DEBUG(DEBUG_CRIT, ("Failed to start syslog daemon\n"));
952 ctdb_lockdown_memory(ctdb);
954 /* go into a wait loop to allow other nodes to complete */
955 event_loop_wait(ctdb->ev);
957 DEBUG(DEBUG_CRIT,("event_loop_wait() returned. this should not happen\n"));
962 allocate a packet for use in daemon<->daemon communication
964 struct ctdb_req_header *_ctdb_transport_allocate(struct ctdb_context *ctdb,
966 enum ctdb_operation operation,
967 size_t length, size_t slength,
971 struct ctdb_req_header *hdr;
973 length = MAX(length, slength);
974 size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
976 if (ctdb->methods == NULL) {
977 DEBUG(DEBUG_INFO,(__location__ " Unable to allocate transport packet for operation %u of length %u. Transport is DOWN.\n",
978 operation, (unsigned)length));
982 hdr = (struct ctdb_req_header *)ctdb->methods->allocate_pkt(mem_ctx, size);
984 DEBUG(DEBUG_ERR,("Unable to allocate transport packet for operation %u of length %u\n",
985 operation, (unsigned)length));
988 talloc_set_name_const(hdr, type);
989 memset(hdr, 0, slength);
990 hdr->length = length;
991 hdr->operation = operation;
992 hdr->ctdb_magic = CTDB_MAGIC;
993 hdr->ctdb_version = CTDB_VERSION;
994 hdr->generation = ctdb->vnn_map->generation;
995 hdr->srcnode = ctdb->pnn;
1000 struct daemon_control_state {
1001 struct daemon_control_state *next, *prev;
1002 struct ctdb_client *client;
1003 struct ctdb_req_control *c;
1005 struct ctdb_node *node;
1009 callback when a control reply comes in
1011 static void daemon_control_callback(struct ctdb_context *ctdb,
1012 int32_t status, TDB_DATA data,
1013 const char *errormsg,
1016 struct daemon_control_state *state = talloc_get_type(private_data,
1017 struct daemon_control_state);
1018 struct ctdb_client *client = state->client;
1019 struct ctdb_reply_control *r;
1023 /* construct a message to send to the client containing the data */
1024 len = offsetof(struct ctdb_reply_control, data) + data.dsize;
1026 len += strlen(errormsg);
1028 r = ctdbd_allocate_pkt(ctdb, state, CTDB_REPLY_CONTROL, len,
1029 struct ctdb_reply_control);
1030 CTDB_NO_MEMORY_VOID(ctdb, r);
1032 r->hdr.reqid = state->reqid;
1034 r->datalen = data.dsize;
1036 memcpy(&r->data[0], data.dptr, data.dsize);
1038 r->errorlen = strlen(errormsg);
1039 memcpy(&r->data[r->datalen], errormsg, r->errorlen);
1042 ret = daemon_queue_send(client, &r->hdr);
1049 fail all pending controls to a disconnected node
1051 void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node)
1053 struct daemon_control_state *state;
1054 while ((state = node->pending_controls)) {
1055 DLIST_REMOVE(node->pending_controls, state);
1056 daemon_control_callback(ctdb, (uint32_t)-1, tdb_null,
1057 "node is disconnected", state);
1062 destroy a daemon_control_state
1064 static int daemon_control_destructor(struct daemon_control_state *state)
1067 DLIST_REMOVE(state->node->pending_controls, state);
1073 this is called when the ctdb daemon received a ctdb request control
1074 from a local client over the unix domain socket
1076 static void daemon_request_control_from_client(struct ctdb_client *client,
1077 struct ctdb_req_control *c)
1081 struct daemon_control_state *state;
1082 TALLOC_CTX *tmp_ctx = talloc_new(client);
1084 if (c->hdr.destnode == CTDB_CURRENT_NODE) {
1085 c->hdr.destnode = client->ctdb->pnn;
1088 state = talloc(client, struct daemon_control_state);
1089 CTDB_NO_MEMORY_VOID(client->ctdb, state);
1091 state->client = client;
1092 state->c = talloc_steal(state, c);
1093 state->reqid = c->hdr.reqid;
1094 if (ctdb_validate_pnn(client->ctdb, c->hdr.destnode)) {
1095 state->node = client->ctdb->nodes[c->hdr.destnode];
1096 DLIST_ADD(state->node->pending_controls, state);
1101 talloc_set_destructor(state, daemon_control_destructor);
1103 if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
1104 talloc_steal(tmp_ctx, state);
1107 data.dptr = &c->data[0];
1108 data.dsize = c->datalen;
1109 res = ctdb_daemon_send_control(client->ctdb, c->hdr.destnode,
1110 c->srvid, c->opcode, client->client_id,
1112 data, daemon_control_callback,
1115 DEBUG(DEBUG_ERR,(__location__ " Failed to send control to remote node %u\n",
1119 talloc_free(tmp_ctx);
1123 register a call function
1125 int ctdb_daemon_set_call(struct ctdb_context *ctdb, uint32_t db_id,
1126 ctdb_fn_t fn, int id)
1128 struct ctdb_registered_call *call;
1129 struct ctdb_db_context *ctdb_db;
1131 ctdb_db = find_ctdb_db(ctdb, db_id);
1132 if (ctdb_db == NULL) {
1136 call = talloc(ctdb_db, struct ctdb_registered_call);
1140 DLIST_ADD(ctdb_db->calls, call);
1147 this local messaging handler is ugly, but is needed to prevent
1148 recursion in ctdb_send_message() when the destination node is the
1149 same as the source node
1151 struct ctdb_local_message {
1152 struct ctdb_context *ctdb;
1157 static void ctdb_local_message_trigger(struct event_context *ev, struct timed_event *te,
1158 struct timeval t, void *private_data)
1160 struct ctdb_local_message *m = talloc_get_type(private_data,
1161 struct ctdb_local_message);
1164 res = ctdb_dispatch_message(m->ctdb, m->srvid, m->data);
1166 DEBUG(DEBUG_ERR, (__location__ " Failed to dispatch message for srvid=%llu\n",
1167 (unsigned long long)m->srvid));
1172 static int ctdb_local_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data)
1174 struct ctdb_local_message *m;
1175 m = talloc(ctdb, struct ctdb_local_message);
1176 CTDB_NO_MEMORY(ctdb, m);
1181 m->data.dptr = talloc_memdup(m, m->data.dptr, m->data.dsize);
1182 if (m->data.dptr == NULL) {
1187 /* this needs to be done as an event to prevent recursion */
1188 event_add_timed(ctdb->ev, m, timeval_zero(), ctdb_local_message_trigger, m);
1195 int ctdb_daemon_send_message(struct ctdb_context *ctdb, uint32_t pnn,
1196 uint64_t srvid, TDB_DATA data)
1198 struct ctdb_req_message *r;
1201 if (ctdb->methods == NULL) {
1202 DEBUG(DEBUG_INFO,(__location__ " Failed to send message. Transport is DOWN\n"));
1206 /* see if this is a message to ourselves */
1207 if (pnn == ctdb->pnn) {
1208 return ctdb_local_message(ctdb, srvid, data);
1211 len = offsetof(struct ctdb_req_message, data) + data.dsize;
1212 r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_MESSAGE, len,
1213 struct ctdb_req_message);
1214 CTDB_NO_MEMORY(ctdb, r);
1216 r->hdr.destnode = pnn;
1218 r->datalen = data.dsize;
1219 memcpy(&r->data[0], data.dptr, data.dsize);
1221 ctdb_queue_packet(ctdb, &r->hdr);
1229 struct ctdb_client_notify_list {
1230 struct ctdb_client_notify_list *next, *prev;
1231 struct ctdb_context *ctdb;
1237 static int ctdb_client_notify_destructor(struct ctdb_client_notify_list *nl)
1241 DEBUG(DEBUG_ERR,("Sending client notify message for srvid:%llu\n", (unsigned long long)nl->srvid));
1243 ret = ctdb_daemon_send_message(nl->ctdb, CTDB_BROADCAST_CONNECTED, (unsigned long long)nl->srvid, nl->data);
1245 DEBUG(DEBUG_ERR,("Failed to send client notify message\n"));
1251 int32_t ctdb_control_register_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
1253 struct ctdb_client_notify_register *notify = (struct ctdb_client_notify_register *)indata.dptr;
1254 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1255 struct ctdb_client_notify_list *nl;
1257 DEBUG(DEBUG_INFO,("Register srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id));
1259 if (indata.dsize < offsetof(struct ctdb_client_notify_register, notify_data)) {
1260 DEBUG(DEBUG_ERR,(__location__ " Too little data in control : %d\n", (int)indata.dsize));
1264 if (indata.dsize != (notify->len + offsetof(struct ctdb_client_notify_register, notify_data))) {
1265 DEBUG(DEBUG_ERR,(__location__ " Wrong amount of data in control. Got %d, expected %d\n", (int)indata.dsize, (int)(notify->len + offsetof(struct ctdb_client_notify_register, notify_data))));
1270 if (client == NULL) {
1271 DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
1275 for(nl=client->notify; nl; nl=nl->next) {
1276 if (nl->srvid == notify->srvid) {
1281 DEBUG(DEBUG_ERR,(__location__ " Notification for srvid:%llu already exists for this client\n", (unsigned long long)notify->srvid));
1285 nl = talloc(client, struct ctdb_client_notify_list);
1286 CTDB_NO_MEMORY(ctdb, nl);
1288 nl->srvid = notify->srvid;
1289 nl->data.dsize = notify->len;
1290 nl->data.dptr = talloc_size(nl, nl->data.dsize);
1291 CTDB_NO_MEMORY(ctdb, nl->data.dptr);
1292 memcpy(nl->data.dptr, notify->notify_data, nl->data.dsize);
1294 DLIST_ADD(client->notify, nl);
1295 talloc_set_destructor(nl, ctdb_client_notify_destructor);
1300 int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
1302 struct ctdb_client_notify_deregister *notify = (struct ctdb_client_notify_deregister *)indata.dptr;
1303 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1304 struct ctdb_client_notify_list *nl;
1306 DEBUG(DEBUG_INFO,("Deregister srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id));
1308 if (client == NULL) {
1309 DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
1313 for(nl=client->notify; nl; nl=nl->next) {
1314 if (nl->srvid == notify->srvid) {
1319 DEBUG(DEBUG_ERR,(__location__ " No notification for srvid:%llu found for this client\n", (unsigned long long)notify->srvid));
1323 DLIST_REMOVE(client->notify, nl);
1324 talloc_set_destructor(nl, NULL);
1330 struct ctdb_client *ctdb_find_client_by_pid(struct ctdb_context *ctdb, pid_t pid)
1332 struct ctdb_client_pid_list *client_pid;
1334 for (client_pid = ctdb->client_pids; client_pid; client_pid=client_pid->next) {
1335 if (client_pid->pid == pid) {
1336 return client_pid->client;
1343 /* This control is used by samba when probing if a process (of a samba daemon)
1345 Samba does this when it needs/wants to check if a subrecord in one of the
1346 databases is still valied, or if it is stale and can be removed.
1347 If the node is in unhealthy or stopped state we just kill of the samba
1348 process holding htis sub-record and return to the calling samba that
1349 the process does not exist.
1350 This allows us to forcefully recall subrecords registered by samba processes
1351 on banned and stopped nodes.
1353 int32_t ctdb_control_process_exists(struct ctdb_context *ctdb, pid_t pid)
1355 struct ctdb_client *client;
1357 if (ctdb->nodes[ctdb->pnn]->flags & (NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) {
1358 client = ctdb_find_client_by_pid(ctdb, pid);
1359 if (client != NULL) {
1360 DEBUG(DEBUG_NOTICE,(__location__ " Killing client with pid:%d on banned/stopped node\n", (int)pid));
1361 talloc_free(client);
1366 return kill(pid, 0);