4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
24 #include "lib/util/dlinklist.h"
25 #include "system/network.h"
26 #include "system/filesys.h"
27 #include "system/locale.h"
29 #include "../include/ctdb_private.h"
30 #include "lib/util/dlinklist.h"
35 allocate a packet for use in client<->daemon communication
37 struct ctdb_req_header *_ctdbd_allocate_pkt(struct ctdb_context *ctdb,
39 enum ctdb_operation operation,
40 size_t length, size_t slength,
44 struct ctdb_req_header *hdr;
46 length = MAX(length, slength);
47 size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
49 hdr = (struct ctdb_req_header *)talloc_zero_size(mem_ctx, size);
51 DEBUG(DEBUG_ERR,("Unable to allocate packet for operation %u of length %u\n",
52 operation, (unsigned)length));
55 talloc_set_name_const(hdr, type);
57 hdr->operation = operation;
58 hdr->ctdb_magic = CTDB_MAGIC;
59 hdr->ctdb_version = CTDB_VERSION;
60 hdr->srcnode = ctdb->pnn;
62 hdr->generation = ctdb->vnn_map->generation;
69 local version of ctdb_call
71 int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
72 struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
73 TDB_DATA *data, bool updatetdb)
75 struct ctdb_call_info *c;
76 struct ctdb_registered_call *fn;
77 struct ctdb_context *ctdb = ctdb_db->ctdb;
79 c = talloc(ctdb, struct ctdb_call_info);
80 CTDB_NO_MEMORY(ctdb, c);
83 c->call_data = &call->call_data;
84 c->record_data.dptr = talloc_memdup(c, data->dptr, data->dsize);
85 c->record_data.dsize = data->dsize;
86 CTDB_NO_MEMORY(ctdb, c->record_data.dptr);
92 for (fn=ctdb_db->calls;fn;fn=fn->next) {
93 if (fn->id == call->call_id) break;
96 ctdb_set_error(ctdb, "Unknown call id %u\n", call->call_id);
101 if (fn->fn(c) != 0) {
102 ctdb_set_error(ctdb, "ctdb_call %u failed\n", call->call_id);
107 /* we need to force the record to be written out if this was a remote access */
108 if (c->new_data == NULL) {
109 c->new_data = &c->record_data;
112 if (c->new_data && updatetdb) {
113 /* XXX check that we always have the lock here? */
114 if (ctdb_ltdb_store(ctdb_db, call->key, header, *c->new_data) != 0) {
115 ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
122 call->reply_data = *c->reply_data;
124 talloc_steal(call, call->reply_data.dptr);
125 talloc_set_name_const(call->reply_data.dptr, __location__);
127 call->reply_data.dptr = NULL;
128 call->reply_data.dsize = 0;
130 call->status = c->status;
139 queue a packet for sending from client to daemon
141 static int ctdb_client_queue_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
143 return ctdb_queue_send(ctdb->daemon.queue, (uint8_t *)hdr, hdr->length);
148 called when a CTDB_REPLY_CALL packet comes in in the client
150 This packet comes in response to a CTDB_REQ_CALL request packet. It
151 contains any reply data from the call
153 static void ctdb_client_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
155 struct ctdb_reply_call *c = (struct ctdb_reply_call *)hdr;
156 struct ctdb_client_call_state *state;
158 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_call_state);
160 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
164 if (hdr->reqid != state->reqid) {
165 /* we found a record but it was the wrong one */
166 DEBUG(DEBUG_ERR, ("Dropped client call reply with reqid:%u\n",hdr->reqid));
170 state->call->reply_data.dptr = c->data;
171 state->call->reply_data.dsize = c->datalen;
172 state->call->status = c->status;
174 talloc_steal(state, c);
176 state->state = CTDB_CALL_DONE;
178 if (state->async.fn) {
179 state->async.fn(state);
183 static void ctdb_client_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
186 this is called in the client, when data comes in from the daemon
188 void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
190 struct ctdb_context *ctdb = talloc_get_type(args, struct ctdb_context);
191 struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
194 /* place the packet as a child of a tmp_ctx. We then use
195 talloc_free() below to free it. If any of the calls want
196 to keep it, then they will steal it somewhere else, and the
197 talloc_free() will be a no-op */
198 tmp_ctx = talloc_new(ctdb);
199 talloc_steal(tmp_ctx, hdr);
202 DEBUG(DEBUG_CRIT,("Daemon has exited - shutting down client\n"));
206 if (cnt < sizeof(*hdr)) {
207 DEBUG(DEBUG_CRIT,("Bad packet length %u in client\n", (unsigned)cnt));
210 if (cnt != hdr->length) {
211 ctdb_set_error(ctdb, "Bad header length %u expected %u in client\n",
212 (unsigned)hdr->length, (unsigned)cnt);
216 if (hdr->ctdb_magic != CTDB_MAGIC) {
217 ctdb_set_error(ctdb, "Non CTDB packet rejected in client\n");
221 if (hdr->ctdb_version != CTDB_VERSION) {
222 ctdb_set_error(ctdb, "Bad CTDB version 0x%x rejected in client\n", hdr->ctdb_version);
226 switch (hdr->operation) {
227 case CTDB_REPLY_CALL:
228 ctdb_client_reply_call(ctdb, hdr);
231 case CTDB_REQ_MESSAGE:
232 ctdb_request_message(ctdb, hdr);
235 case CTDB_REPLY_CONTROL:
236 ctdb_client_reply_control(ctdb, hdr);
240 DEBUG(DEBUG_CRIT,("bogus operation code:%u\n",hdr->operation));
244 talloc_free(tmp_ctx);
248 connect to a unix domain socket
250 int ctdb_socket_connect(struct ctdb_context *ctdb)
252 struct sockaddr_un addr;
254 memset(&addr, 0, sizeof(addr));
255 addr.sun_family = AF_UNIX;
256 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
258 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
259 if (ctdb->daemon.sd == -1) {
260 DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
264 if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
265 close(ctdb->daemon.sd);
266 ctdb->daemon.sd = -1;
267 DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon. Errno:%s(%d)\n", strerror(errno), errno));
271 set_nonblocking(ctdb->daemon.sd);
272 set_close_on_exec(ctdb->daemon.sd);
274 ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
276 ctdb_client_read_cb, ctdb, "to-ctdbd");
281 struct ctdb_record_handle {
282 struct ctdb_db_context *ctdb_db;
285 struct ctdb_ltdb_header header;
290 make a recv call to the local ctdb daemon - called from client context
292 This is called when the program wants to wait for a ctdb_call to complete and get the
293 results. This call will block unless the call has already completed.
295 int ctdb_call_recv(struct ctdb_client_call_state *state, struct ctdb_call *call)
301 while (state->state < CTDB_CALL_DONE) {
302 event_loop_once(state->ctdb_db->ctdb->ev);
304 if (state->state != CTDB_CALL_DONE) {
305 DEBUG(DEBUG_ERR,(__location__ " ctdb_call_recv failed\n"));
310 if (state->call->reply_data.dsize) {
311 call->reply_data.dptr = talloc_memdup(state->ctdb_db,
312 state->call->reply_data.dptr,
313 state->call->reply_data.dsize);
314 call->reply_data.dsize = state->call->reply_data.dsize;
316 call->reply_data.dptr = NULL;
317 call->reply_data.dsize = 0;
319 call->status = state->call->status;
329 destroy a ctdb_call in client
331 static int ctdb_client_call_destructor(struct ctdb_client_call_state *state)
333 ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
338 construct an event driven local ctdb_call
340 this is used so that locally processed ctdb_call requests are processed
341 in an event driven manner
343 static struct ctdb_client_call_state *ctdb_client_call_local_send(struct ctdb_db_context *ctdb_db,
344 struct ctdb_call *call,
345 struct ctdb_ltdb_header *header,
348 struct ctdb_client_call_state *state;
349 struct ctdb_context *ctdb = ctdb_db->ctdb;
352 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
353 CTDB_NO_MEMORY_NULL(ctdb, state);
354 state->call = talloc_zero(state, struct ctdb_call);
355 CTDB_NO_MEMORY_NULL(ctdb, state->call);
357 talloc_steal(state, data->dptr);
359 state->state = CTDB_CALL_DONE;
360 *(state->call) = *call;
361 state->ctdb_db = ctdb_db;
363 ret = ctdb_call_local(ctdb_db, state->call, header, state, data, true);
365 DEBUG(DEBUG_DEBUG,("ctdb_call_local() failed, ignoring return code %d\n", ret));
372 make a ctdb call to the local daemon - async send. Called from client context.
374 This constructs a ctdb_call request and queues it for processing.
375 This call never blocks.
377 struct ctdb_client_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db,
378 struct ctdb_call *call)
380 struct ctdb_client_call_state *state;
381 struct ctdb_context *ctdb = ctdb_db->ctdb;
382 struct ctdb_ltdb_header header;
386 struct ctdb_req_call *c;
388 /* if the domain socket is not yet open, open it */
389 if (ctdb->daemon.sd==-1) {
390 ctdb_socket_connect(ctdb);
393 ret = ctdb_ltdb_lock(ctdb_db, call->key);
395 DEBUG(DEBUG_ERR,(__location__ " Failed to get chainlock\n"));
399 ret = ctdb_ltdb_fetch(ctdb_db, call->key, &header, ctdb_db, &data);
401 if ((call->flags & CTDB_IMMEDIATE_MIGRATION) && (header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
405 if (ret == 0 && header.dmaster == ctdb->pnn) {
406 state = ctdb_client_call_local_send(ctdb_db, call, &header, &data);
407 talloc_free(data.dptr);
408 ctdb_ltdb_unlock(ctdb_db, call->key);
412 ctdb_ltdb_unlock(ctdb_db, call->key);
413 talloc_free(data.dptr);
415 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
417 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state\n"));
420 state->call = talloc_zero(state, struct ctdb_call);
421 if (state->call == NULL) {
422 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state->call\n"));
426 len = offsetof(struct ctdb_req_call, data) + call->key.dsize + call->call_data.dsize;
427 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CALL, len, struct ctdb_req_call);
429 DEBUG(DEBUG_ERR, (__location__ " failed to allocate packet\n"));
433 state->reqid = ctdb_reqid_new(ctdb, state);
434 state->ctdb_db = ctdb_db;
435 talloc_set_destructor(state, ctdb_client_call_destructor);
437 c->hdr.reqid = state->reqid;
438 c->flags = call->flags;
439 c->db_id = ctdb_db->db_id;
440 c->callid = call->call_id;
442 c->keylen = call->key.dsize;
443 c->calldatalen = call->call_data.dsize;
444 memcpy(&c->data[0], call->key.dptr, call->key.dsize);
445 memcpy(&c->data[call->key.dsize],
446 call->call_data.dptr, call->call_data.dsize);
447 *(state->call) = *call;
448 state->call->call_data.dptr = &c->data[call->key.dsize];
449 state->call->key.dptr = &c->data[0];
451 state->state = CTDB_CALL_WAIT;
454 ctdb_client_queue_pkt(ctdb, &c->hdr);
461 full ctdb_call. Equivalent to a ctdb_call_send() followed by a ctdb_call_recv()
463 int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
465 struct ctdb_client_call_state *state;
467 state = ctdb_call_send(ctdb_db, call);
468 return ctdb_call_recv(state, call);
473 tell the daemon what messaging srvid we will use, and register the message
474 handler function in the client
476 int ctdb_client_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
477 ctdb_msg_fn_t handler,
483 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_REGISTER_SRVID, 0,
484 tdb_null, NULL, NULL, &status, NULL, NULL);
485 if (res != 0 || status != 0) {
486 DEBUG(DEBUG_ERR,("Failed to register srvid %llu\n", (unsigned long long)srvid));
490 /* also need to register the handler with our own ctdb structure */
491 return ctdb_register_message_handler(ctdb, ctdb, srvid, handler, private_data);
495 tell the daemon we no longer want a srvid
497 int ctdb_client_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
502 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_DEREGISTER_SRVID, 0,
503 tdb_null, NULL, NULL, &status, NULL, NULL);
504 if (res != 0 || status != 0) {
505 DEBUG(DEBUG_ERR,("Failed to deregister srvid %llu\n", (unsigned long long)srvid));
509 /* also need to register the handler with our own ctdb structure */
510 ctdb_deregister_message_handler(ctdb, srvid, private_data);
517 int ctdb_client_check_message_handlers(struct ctdb_context *ctdb, uint64_t *ids, uint32_t num,
520 TDB_DATA indata, outdata;
525 indata.dptr = (uint8_t *)ids;
526 indata.dsize = num * sizeof(*ids);
528 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_CHECK_SRVIDS, 0,
529 indata, ctdb, &outdata, &status, NULL, NULL);
530 if (res != 0 || status != 0) {
531 DEBUG(DEBUG_ERR, (__location__ " failed to check srvids\n"));
535 if (outdata.dsize != num*sizeof(uint8_t)) {
536 DEBUG(DEBUG_ERR, (__location__ " expected %lu bytes, received %zi bytes\n",
537 (long unsigned int)num*sizeof(uint8_t),
539 talloc_free(outdata.dptr);
543 for (i=0; i<num; i++) {
544 result[i] = outdata.dptr[i];
547 talloc_free(outdata.dptr);
552 send a message - from client context
554 int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
555 uint64_t srvid, TDB_DATA data)
557 struct ctdb_req_message *r;
560 len = offsetof(struct ctdb_req_message, data) + data.dsize;
561 r = ctdbd_allocate_pkt(ctdb, ctdb, CTDB_REQ_MESSAGE,
562 len, struct ctdb_req_message);
563 CTDB_NO_MEMORY(ctdb, r);
565 r->hdr.destnode = pnn;
567 r->datalen = data.dsize;
568 memcpy(&r->data[0], data.dptr, data.dsize);
570 res = ctdb_client_queue_pkt(ctdb, &r->hdr);
577 cancel a ctdb_fetch_lock operation, releasing the lock
579 static int fetch_lock_destructor(struct ctdb_record_handle *h)
581 ctdb_ltdb_unlock(h->ctdb_db, h->key);
586 force the migration of a record to this node
588 static int ctdb_client_force_migration(struct ctdb_db_context *ctdb_db, TDB_DATA key)
590 struct ctdb_call call;
592 call.call_id = CTDB_NULL_FUNC;
594 call.flags = CTDB_IMMEDIATE_MIGRATION;
595 return ctdb_call(ctdb_db, &call);
599 try to fetch a readonly copy of a record
602 ctdb_client_fetch_readonly(struct ctdb_db_context *ctdb_db, TDB_DATA key, TALLOC_CTX *mem_ctx, struct ctdb_ltdb_header **hdr, TDB_DATA *data)
606 struct ctdb_call call;
609 call.call_id = CTDB_FETCH_WITH_HEADER_FUNC;
610 call.call_data.dptr = NULL;
611 call.call_data.dsize = 0;
613 call.flags = CTDB_WANT_READONLY;
614 ret = ctdb_call(ctdb_db, &call);
619 if (call.reply_data.dsize < sizeof(struct ctdb_ltdb_header)) {
623 *hdr = talloc_memdup(mem_ctx, &call.reply_data.dptr[0], sizeof(struct ctdb_ltdb_header));
625 talloc_free(call.reply_data.dptr);
629 data->dsize = call.reply_data.dsize - sizeof(struct ctdb_ltdb_header);
630 data->dptr = talloc_memdup(mem_ctx, &call.reply_data.dptr[sizeof(struct ctdb_ltdb_header)], data->dsize);
631 if (data->dptr == NULL) {
632 talloc_free(call.reply_data.dptr);
641 get a lock on a record, and return the records data. Blocks until it gets the lock
643 struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
644 TDB_DATA key, TDB_DATA *data)
647 struct ctdb_record_handle *h;
650 procedure is as follows:
652 1) get the chain lock.
653 2) check if we are dmaster
654 3) if we are the dmaster then return handle
655 4) if not dmaster then ask ctdb daemon to make us dmaster, and wait for
657 5) when we get the reply, goto (1)
660 h = talloc_zero(mem_ctx, struct ctdb_record_handle);
665 h->ctdb_db = ctdb_db;
667 h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
668 if (h->key.dptr == NULL) {
674 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: key=%*.*s\n", (int)key.dsize, (int)key.dsize,
675 (const char *)key.dptr));
678 /* step 1 - get the chain lock */
679 ret = ctdb_ltdb_lock(ctdb_db, key);
681 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
686 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: got chain lock\n"));
688 talloc_set_destructor(h, fetch_lock_destructor);
690 ret = ctdb_ltdb_fetch(ctdb_db, key, &h->header, h, data);
692 /* when torturing, ensure we test the remote path */
693 if ((ctdb_db->ctdb->flags & CTDB_FLAG_TORTURE) &&
695 h->header.dmaster = (uint32_t)-1;
699 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: done local fetch\n"));
701 if (ret != 0 || h->header.dmaster != ctdb_db->ctdb->pnn) {
702 ctdb_ltdb_unlock(ctdb_db, key);
703 ret = ctdb_client_force_migration(ctdb_db, key);
705 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
712 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: we are dmaster - done\n"));
717 get a readonly lock on a record, and return the records data. Blocks until it gets the lock
719 struct ctdb_record_handle *
720 ctdb_fetch_readonly_lock(
721 struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
722 TDB_DATA key, TDB_DATA *data,
726 struct ctdb_record_handle *h;
727 struct ctdb_ltdb_header *roheader = NULL;
729 h = talloc_zero(mem_ctx, struct ctdb_record_handle);
734 h->ctdb_db = ctdb_db;
736 h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
737 if (h->key.dptr == NULL) {
748 talloc_free(roheader);
751 talloc_free(data->dptr);
755 /* Lock the record/chain */
756 ret = ctdb_ltdb_lock(ctdb_db, key);
758 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
763 talloc_set_destructor(h, fetch_lock_destructor);
765 /* Check if record exists yet in the TDB */
766 ret = ctdb_ltdb_fetch_with_header(ctdb_db, key, &h->header, h, data);
768 ctdb_ltdb_unlock(ctdb_db, key);
769 ret = ctdb_client_force_migration(ctdb_db, key);
771 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
778 /* if this is a request for read/write and we have delegations
779 we have to revoke all delegations first
782 && (h->header.dmaster == ctdb_db->ctdb->pnn)
783 && (h->header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
784 ctdb_ltdb_unlock(ctdb_db, key);
785 ret = ctdb_client_force_migration(ctdb_db, key);
787 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
794 /* if we are dmaster, just return the handle */
795 if (h->header.dmaster == ctdb_db->ctdb->pnn) {
799 if (read_only != 0) {
800 TDB_DATA rodata = {NULL, 0};
802 if ((h->header.flags & CTDB_REC_RO_HAVE_READONLY)
803 || (h->header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
807 ctdb_ltdb_unlock(ctdb_db, key);
808 ret = ctdb_client_fetch_readonly(ctdb_db, key, h, &roheader, &rodata);
810 DEBUG(DEBUG_ERR,("ctdb_fetch_readonly_lock: failed. force migration and try again\n"));
811 ret = ctdb_client_force_migration(ctdb_db, key);
813 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
821 if (!(roheader->flags&CTDB_REC_RO_HAVE_READONLY)) {
822 ret = ctdb_client_force_migration(ctdb_db, key);
824 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
832 ret = ctdb_ltdb_lock(ctdb_db, key);
834 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
839 ret = ctdb_ltdb_fetch_with_header(ctdb_db, key, &h->header, h, data);
841 ctdb_ltdb_unlock(ctdb_db, key);
843 ret = ctdb_client_force_migration(ctdb_db, key);
845 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
856 /* we are not dmaster and this was not a request for a readonly lock
857 * so unlock the record, migrate it and try again
859 ctdb_ltdb_unlock(ctdb_db, key);
860 ret = ctdb_client_force_migration(ctdb_db, key);
862 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
870 store some data to the record that was locked with ctdb_fetch_lock()
872 int ctdb_record_store(struct ctdb_record_handle *h, TDB_DATA data)
874 if (h->ctdb_db->persistent) {
875 DEBUG(DEBUG_ERR, (__location__ " ctdb_record_store prohibited for persistent dbs\n"));
879 return ctdb_ltdb_store(h->ctdb_db, h->key, &h->header, data);
883 non-locking fetch of a record
885 int ctdb_fetch(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
886 TDB_DATA key, TDB_DATA *data)
888 struct ctdb_call call;
891 call.call_id = CTDB_FETCH_FUNC;
892 call.call_data.dptr = NULL;
893 call.call_data.dsize = 0;
896 ret = ctdb_call(ctdb_db, &call);
899 *data = call.reply_data;
900 talloc_steal(mem_ctx, data->dptr);
909 called when a control completes or timesout to invoke the callback
910 function the user provided
912 static void invoke_control_callback(struct event_context *ev, struct timed_event *te,
913 struct timeval t, void *private_data)
915 struct ctdb_client_control_state *state;
916 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
919 state = talloc_get_type(private_data, struct ctdb_client_control_state);
920 talloc_steal(tmp_ctx, state);
922 ret = ctdb_control_recv(state->ctdb, state, state,
927 DEBUG(DEBUG_DEBUG,("ctdb_control_recv() failed, ignoring return code %d\n", ret));
930 talloc_free(tmp_ctx);
934 called when a CTDB_REPLY_CONTROL packet comes in in the client
936 This packet comes in response to a CTDB_REQ_CONTROL request packet. It
937 contains any reply data from the control
939 static void ctdb_client_reply_control(struct ctdb_context *ctdb,
940 struct ctdb_req_header *hdr)
942 struct ctdb_reply_control *c = (struct ctdb_reply_control *)hdr;
943 struct ctdb_client_control_state *state;
945 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_control_state);
947 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
951 if (hdr->reqid != state->reqid) {
952 /* we found a record but it was the wrong one */
953 DEBUG(DEBUG_ERR, ("Dropped orphaned reply control with reqid:%u\n",hdr->reqid));
957 state->outdata.dptr = c->data;
958 state->outdata.dsize = c->datalen;
959 state->status = c->status;
961 state->errormsg = talloc_strndup(state,
962 (char *)&c->data[c->datalen],
966 /* state->outdata now uses resources from c so we dont want c
967 to just dissappear from under us while state is still alive
969 talloc_steal(state, c);
971 state->state = CTDB_CONTROL_DONE;
973 /* if we had a callback registered for this control, pull the response
974 and call the callback.
976 if (state->async.fn) {
977 event_add_timed(ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
983 destroy a ctdb_control in client
985 static int ctdb_client_control_destructor(struct ctdb_client_control_state *state)
987 ctdb_reqid_remove(state->ctdb, state->reqid);
992 /* time out handler for ctdb_control */
993 static void control_timeout_func(struct event_context *ev, struct timed_event *te,
994 struct timeval t, void *private_data)
996 struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
998 DEBUG(DEBUG_ERR,(__location__ " control timed out. reqid:%u opcode:%u "
999 "dstnode:%u\n", state->reqid, state->c->opcode,
1000 state->c->hdr.destnode));
1002 state->state = CTDB_CONTROL_TIMEOUT;
1004 /* if we had a callback registered for this control, pull the response
1005 and call the callback.
1007 if (state->async.fn) {
1008 event_add_timed(state->ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
1012 /* async version of send control request */
1013 struct ctdb_client_control_state *ctdb_control_send(struct ctdb_context *ctdb,
1014 uint32_t destnode, uint64_t srvid,
1015 uint32_t opcode, uint32_t flags, TDB_DATA data,
1016 TALLOC_CTX *mem_ctx,
1017 struct timeval *timeout,
1020 struct ctdb_client_control_state *state;
1022 struct ctdb_req_control *c;
1029 /* if the domain socket is not yet open, open it */
1030 if (ctdb->daemon.sd==-1) {
1031 ctdb_socket_connect(ctdb);
1034 state = talloc_zero(mem_ctx, struct ctdb_client_control_state);
1035 CTDB_NO_MEMORY_NULL(ctdb, state);
1038 state->reqid = ctdb_reqid_new(ctdb, state);
1039 state->state = CTDB_CONTROL_WAIT;
1040 state->errormsg = NULL;
1042 talloc_set_destructor(state, ctdb_client_control_destructor);
1044 len = offsetof(struct ctdb_req_control, data) + data.dsize;
1045 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CONTROL,
1046 len, struct ctdb_req_control);
1048 CTDB_NO_MEMORY_NULL(ctdb, c);
1049 c->hdr.reqid = state->reqid;
1050 c->hdr.destnode = destnode;
1055 c->datalen = data.dsize;
1057 memcpy(&c->data[0], data.dptr, data.dsize);
1061 if (timeout && !timeval_is_zero(timeout)) {
1062 event_add_timed(ctdb->ev, state, *timeout, control_timeout_func, state);
1065 ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
1071 if (flags & CTDB_CTRL_FLAG_NOREPLY) {
1080 /* async version of receive control reply */
1081 int ctdb_control_recv(struct ctdb_context *ctdb,
1082 struct ctdb_client_control_state *state,
1083 TALLOC_CTX *mem_ctx,
1084 TDB_DATA *outdata, int32_t *status, char **errormsg)
1086 TALLOC_CTX *tmp_ctx;
1088 if (status != NULL) {
1091 if (errormsg != NULL) {
1095 if (state == NULL) {
1099 /* prevent double free of state */
1100 tmp_ctx = talloc_new(ctdb);
1101 talloc_steal(tmp_ctx, state);
1103 /* loop one event at a time until we either timeout or the control
1106 while (state->state == CTDB_CONTROL_WAIT) {
1107 event_loop_once(ctdb->ev);
1110 if (state->state != CTDB_CONTROL_DONE) {
1111 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_recv failed\n"));
1112 if (state->async.fn) {
1113 state->async.fn(state);
1115 talloc_free(tmp_ctx);
1119 if (state->errormsg) {
1120 DEBUG(DEBUG_ERR,("ctdb_control error: '%s'\n", state->errormsg));
1122 (*errormsg) = talloc_move(mem_ctx, &state->errormsg);
1124 if (state->async.fn) {
1125 state->async.fn(state);
1127 talloc_free(tmp_ctx);
1132 *outdata = state->outdata;
1133 outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
1137 *status = state->status;
1140 if (state->async.fn) {
1141 state->async.fn(state);
1144 talloc_free(tmp_ctx);
1151 send a ctdb control message
1152 timeout specifies how long we should wait for a reply.
1153 if timeout is NULL we wait indefinitely
1155 int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
1156 uint32_t opcode, uint32_t flags, TDB_DATA data,
1157 TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
1158 struct timeval *timeout,
1161 struct ctdb_client_control_state *state;
1163 state = ctdb_control_send(ctdb, destnode, srvid, opcode,
1164 flags, data, mem_ctx,
1167 /* FIXME: Error conditions in ctdb_control_send return NULL without
1168 * setting errormsg. So, there is no way to distinguish between sucess
1169 * and failure when CTDB_CTRL_FLAG_NOREPLY is set */
1170 if (flags & CTDB_CTRL_FLAG_NOREPLY) {
1171 if (status != NULL) {
1177 return ctdb_control_recv(ctdb, state, mem_ctx, outdata, status,
1185 a process exists call. Returns 0 if process exists, -1 otherwise
1187 int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t pid)
1193 data.dptr = (uint8_t*)&pid;
1194 data.dsize = sizeof(pid);
1196 ret = ctdb_control(ctdb, destnode, 0,
1197 CTDB_CONTROL_PROCESS_EXISTS, 0, data,
1198 NULL, NULL, &status, NULL, NULL);
1200 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for process_exists failed\n"));
1208 get remote statistics
1210 int ctdb_ctrl_statistics(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_statistics *status)
1216 ret = ctdb_control(ctdb, destnode, 0,
1217 CTDB_CONTROL_STATISTICS, 0, tdb_null,
1218 ctdb, &data, &res, NULL, NULL);
1219 if (ret != 0 || res != 0) {
1220 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for statistics failed\n"));
1224 if (data.dsize != sizeof(struct ctdb_statistics)) {
1225 DEBUG(DEBUG_ERR,(__location__ " Wrong statistics size %u - expected %u\n",
1226 (unsigned)data.dsize, (unsigned)sizeof(struct ctdb_statistics)));
1230 *status = *(struct ctdb_statistics *)data.dptr;
1231 talloc_free(data.dptr);
1239 int ctdb_ctrl_dbstatistics(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
1240 TALLOC_CTX *mem_ctx, struct ctdb_db_statistics **dbstat)
1243 TDB_DATA indata, outdata;
1245 struct ctdb_db_statistics *wire, *s;
1249 indata.dptr = (uint8_t *)&dbid;
1250 indata.dsize = sizeof(dbid);
1252 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DB_STATISTICS,
1253 0, indata, ctdb, &outdata, &res, NULL, NULL);
1254 if (ret != 0 || res != 0) {
1255 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for dbstatistics failed\n"));
1259 if (outdata.dsize < offsetof(struct ctdb_db_statistics, hot_keys_wire)) {
1260 DEBUG(DEBUG_ERR,(__location__ " Wrong dbstatistics size %zi - expected >= %lu\n",
1262 (long unsigned int)sizeof(struct ctdb_statistics)));
1266 s = talloc_zero(mem_ctx, struct ctdb_db_statistics);
1268 talloc_free(outdata.dptr);
1269 CTDB_NO_MEMORY(ctdb, s);
1272 wire = (struct ctdb_db_statistics *)outdata.dptr;
1274 ptr = &wire->hot_keys_wire[0];
1275 for (i=0; i<wire->num_hot_keys; i++) {
1276 s->hot_keys[i].key.dptr = talloc_size(mem_ctx, s->hot_keys[i].key.dsize);
1277 if (s->hot_keys[i].key.dptr == NULL) {
1278 talloc_free(outdata.dptr);
1279 CTDB_NO_MEMORY(ctdb, s->hot_keys[i].key.dptr);
1282 memcpy(s->hot_keys[i].key.dptr, ptr, s->hot_keys[i].key.dsize);
1283 ptr += wire->hot_keys[i].key.dsize;
1286 talloc_free(outdata.dptr);
1292 shutdown a remote ctdb node
1294 int ctdb_ctrl_shutdown(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
1296 struct ctdb_client_control_state *state;
1298 state = ctdb_control_send(ctdb, destnode, 0,
1299 CTDB_CONTROL_SHUTDOWN, 0, tdb_null,
1300 NULL, &timeout, NULL);
1301 if (state == NULL) {
1302 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for shutdown failed\n"));
1310 get vnn map from a remote node
1312 int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap)
1317 struct ctdb_vnn_map_wire *map;
1319 ret = ctdb_control(ctdb, destnode, 0,
1320 CTDB_CONTROL_GETVNNMAP, 0, tdb_null,
1321 mem_ctx, &outdata, &res, &timeout, NULL);
1322 if (ret != 0 || res != 0) {
1323 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getvnnmap failed\n"));
1327 map = (struct ctdb_vnn_map_wire *)outdata.dptr;
1328 if (outdata.dsize < offsetof(struct ctdb_vnn_map_wire, map) ||
1329 outdata.dsize != map->size*sizeof(uint32_t) + offsetof(struct ctdb_vnn_map_wire, map)) {
1330 DEBUG(DEBUG_ERR,("Bad vnn map size received in ctdb_ctrl_getvnnmap\n"));
1334 (*vnnmap) = talloc(mem_ctx, struct ctdb_vnn_map);
1335 CTDB_NO_MEMORY(ctdb, *vnnmap);
1336 (*vnnmap)->generation = map->generation;
1337 (*vnnmap)->size = map->size;
1338 (*vnnmap)->map = talloc_array(*vnnmap, uint32_t, map->size);
1340 CTDB_NO_MEMORY(ctdb, (*vnnmap)->map);
1341 memcpy((*vnnmap)->map, map->map, sizeof(uint32_t)*map->size);
1342 talloc_free(outdata.dptr);
1349 get the recovery mode of a remote node
1351 struct ctdb_client_control_state *
1352 ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
1354 return ctdb_control_send(ctdb, destnode, 0,
1355 CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
1356 mem_ctx, &timeout, NULL);
1359 int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode)
1364 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1366 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmode_recv failed\n"));
1371 *recmode = (uint32_t)res;
1377 int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
1379 struct ctdb_client_control_state *state;
1381 state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, timeout, destnode);
1382 return ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, state, recmode);
1389 set the recovery mode of a remote node
1391 int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode)
1397 data.dsize = sizeof(uint32_t);
1398 data.dptr = (unsigned char *)&recmode;
1400 ret = ctdb_control(ctdb, destnode, 0,
1401 CTDB_CONTROL_SET_RECMODE, 0, data,
1402 NULL, NULL, &res, &timeout, NULL);
1403 if (ret != 0 || res != 0) {
1404 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmode failed\n"));
1414 get the recovery master of a remote node
1416 struct ctdb_client_control_state *
1417 ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
1418 struct timeval timeout, uint32_t destnode)
1420 return ctdb_control_send(ctdb, destnode, 0,
1421 CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
1422 mem_ctx, &timeout, NULL);
1425 int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster)
1430 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1432 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmaster_recv failed\n"));
1437 *recmaster = (uint32_t)res;
1443 int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
1445 struct ctdb_client_control_state *state;
1447 state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx, timeout, destnode);
1448 return ctdb_ctrl_getrecmaster_recv(ctdb, mem_ctx, state, recmaster);
1453 set the recovery master of a remote node
1455 int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster)
1462 data.dsize = sizeof(uint32_t);
1463 data.dptr = (unsigned char *)&recmaster;
1465 ret = ctdb_control(ctdb, destnode, 0,
1466 CTDB_CONTROL_SET_RECMASTER, 0, data,
1467 NULL, NULL, &res, &timeout, NULL);
1468 if (ret != 0 || res != 0) {
1469 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmaster failed\n"));
1478 get a list of databases off a remote node
1480 int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1481 TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap)
1487 ret = ctdb_control(ctdb, destnode, 0,
1488 CTDB_CONTROL_GET_DBMAP, 0, tdb_null,
1489 mem_ctx, &outdata, &res, &timeout, NULL);
1490 if (ret != 0 || res != 0) {
1491 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res));
1495 *dbmap = (struct ctdb_dbid_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1496 talloc_free(outdata.dptr);
1502 get a list of nodes (vnn and flags ) from a remote node
1504 int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
1505 struct timeval timeout, uint32_t destnode,
1506 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1512 ret = ctdb_control(ctdb, destnode, 0,
1513 CTDB_CONTROL_GET_NODEMAP, 0, tdb_null,
1514 mem_ctx, &outdata, &res, &timeout, NULL);
1515 if (ret == 0 && res == -1 && outdata.dsize == 0) {
1516 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed, falling back to ipv4-only control\n"));
1517 return ctdb_ctrl_getnodemapv4(ctdb, timeout, destnode, mem_ctx, nodemap);
1519 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1520 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed ret:%d res:%d\n", ret, res));
1524 *nodemap = (struct ctdb_node_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1525 talloc_free(outdata.dptr);
1531 old style ipv4-only get a list of nodes (vnn and flags ) from a remote node
1533 int ctdb_ctrl_getnodemapv4(struct ctdb_context *ctdb,
1534 struct timeval timeout, uint32_t destnode,
1535 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1539 struct ctdb_node_mapv4 *nodemapv4;
1542 ret = ctdb_control(ctdb, destnode, 0,
1543 CTDB_CONTROL_GET_NODEMAPv4, 0, tdb_null,
1544 mem_ctx, &outdata, &res, &timeout, NULL);
1545 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1546 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodesv4 failed ret:%d res:%d\n", ret, res));
1550 nodemapv4 = (struct ctdb_node_mapv4 *)outdata.dptr;
1552 len = offsetof(struct ctdb_node_map, nodes) + nodemapv4->num*sizeof(struct ctdb_node_and_flags);
1553 (*nodemap) = talloc_zero_size(mem_ctx, len);
1554 CTDB_NO_MEMORY(ctdb, (*nodemap));
1556 (*nodemap)->num = nodemapv4->num;
1557 for (i=0; i<nodemapv4->num; i++) {
1558 (*nodemap)->nodes[i].pnn = nodemapv4->nodes[i].pnn;
1559 (*nodemap)->nodes[i].flags = nodemapv4->nodes[i].flags;
1560 (*nodemap)->nodes[i].addr.ip = nodemapv4->nodes[i].sin;
1561 (*nodemap)->nodes[i].addr.sa.sa_family = AF_INET;
1564 talloc_free(outdata.dptr);
1570 drop the transport, reload the nodes file and restart the transport
1572 int ctdb_ctrl_reload_nodes_file(struct ctdb_context *ctdb,
1573 struct timeval timeout, uint32_t destnode)
1578 ret = ctdb_control(ctdb, destnode, 0,
1579 CTDB_CONTROL_RELOAD_NODES_FILE, 0, tdb_null,
1580 NULL, NULL, &res, &timeout, NULL);
1581 if (ret != 0 || res != 0) {
1582 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reloadnodesfile failed\n"));
1591 set vnn map on a node
1593 int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1594 TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap)
1599 struct ctdb_vnn_map_wire *map;
1602 len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*vnnmap->size;
1603 map = talloc_size(mem_ctx, len);
1604 CTDB_NO_MEMORY(ctdb, map);
1606 map->generation = vnnmap->generation;
1607 map->size = vnnmap->size;
1608 memcpy(map->map, vnnmap->map, sizeof(uint32_t)*map->size);
1611 data.dptr = (uint8_t *)map;
1613 ret = ctdb_control(ctdb, destnode, 0,
1614 CTDB_CONTROL_SETVNNMAP, 0, data,
1615 NULL, NULL, &res, &timeout, NULL);
1616 if (ret != 0 || res != 0) {
1617 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setvnnmap failed\n"));
1628 async send for pull database
1630 struct ctdb_client_control_state *ctdb_ctrl_pulldb_send(
1631 struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
1632 uint32_t lmaster, TALLOC_CTX *mem_ctx, struct timeval timeout)
1635 struct ctdb_control_pulldb *pull;
1636 struct ctdb_client_control_state *state;
1638 pull = talloc(mem_ctx, struct ctdb_control_pulldb);
1639 CTDB_NO_MEMORY_NULL(ctdb, pull);
1642 pull->lmaster = lmaster;
1644 indata.dsize = sizeof(struct ctdb_control_pulldb);
1645 indata.dptr = (unsigned char *)pull;
1647 state = ctdb_control_send(ctdb, destnode, 0,
1648 CTDB_CONTROL_PULL_DB, 0, indata,
1649 mem_ctx, &timeout, NULL);
1656 async recv for pull database
1658 int ctdb_ctrl_pulldb_recv(
1659 struct ctdb_context *ctdb,
1660 TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state,
1666 ret = ctdb_control_recv(ctdb, state, mem_ctx, outdata, &res, NULL);
1667 if ( (ret != 0) || (res != 0) ){
1668 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_pulldb_recv failed\n"));
1676 pull all keys and records for a specific database on a node
1678 int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode,
1679 uint32_t dbid, uint32_t lmaster,
1680 TALLOC_CTX *mem_ctx, struct timeval timeout,
1683 struct ctdb_client_control_state *state;
1685 state = ctdb_ctrl_pulldb_send(ctdb, destnode, dbid, lmaster, mem_ctx,
1688 return ctdb_ctrl_pulldb_recv(ctdb, mem_ctx, state, outdata);
1693 change dmaster for all keys in the database to the new value
1695 int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1696 TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster)
1702 indata.dsize = 2*sizeof(uint32_t);
1703 indata.dptr = (unsigned char *)talloc_array(mem_ctx, uint32_t, 2);
1705 ((uint32_t *)(&indata.dptr[0]))[0] = dbid;
1706 ((uint32_t *)(&indata.dptr[0]))[1] = dmaster;
1708 ret = ctdb_control(ctdb, destnode, 0,
1709 CTDB_CONTROL_SET_DMASTER, 0, indata,
1710 NULL, NULL, &res, &timeout, NULL);
1711 if (ret != 0 || res != 0) {
1712 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setdmaster failed\n"));
1720 ping a node, return number of clients connected
1722 int ctdb_ctrl_ping(struct ctdb_context *ctdb, uint32_t destnode)
1727 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PING, 0,
1728 tdb_null, NULL, NULL, &res, NULL, NULL);
1735 int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb,
1736 struct timeval timeout,
1744 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_RUNSTATE, 0,
1745 tdb_null, ctdb, &outdata, &res, &timeout, NULL);
1746 if (ret != 0 || res != 0) {
1747 DEBUG(DEBUG_ERR,("ctdb_control for get_runstate failed\n"));
1748 return ret != 0 ? ret : res;
1751 if (outdata.dsize != sizeof(uint32_t)) {
1752 DEBUG(DEBUG_ERR,("Invalid return data in get_runstate\n"));
1753 talloc_free(outdata.dptr);
1757 if (runstate != NULL) {
1758 *runstate = *(uint32_t *)outdata.dptr;
1760 talloc_free(outdata.dptr);
1766 find the real path to a ltdb
1768 int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1775 data.dptr = (uint8_t *)&dbid;
1776 data.dsize = sizeof(dbid);
1778 ret = ctdb_control(ctdb, destnode, 0,
1779 CTDB_CONTROL_GETDBPATH, 0, data,
1780 mem_ctx, &data, &res, &timeout, NULL);
1781 if (ret != 0 || res != 0) {
1785 (*path) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1786 if ((*path) == NULL) {
1790 talloc_free(data.dptr);
1796 find the name of a db
1798 int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1805 data.dptr = (uint8_t *)&dbid;
1806 data.dsize = sizeof(dbid);
1808 ret = ctdb_control(ctdb, destnode, 0,
1809 CTDB_CONTROL_GET_DBNAME, 0, data,
1810 mem_ctx, &data, &res, &timeout, NULL);
1811 if (ret != 0 || res != 0) {
1815 (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1816 if ((*name) == NULL) {
1820 talloc_free(data.dptr);
1826 get the health status of a db
1828 int ctdb_ctrl_getdbhealth(struct ctdb_context *ctdb,
1829 struct timeval timeout,
1831 uint32_t dbid, TALLOC_CTX *mem_ctx,
1832 const char **reason)
1838 data.dptr = (uint8_t *)&dbid;
1839 data.dsize = sizeof(dbid);
1841 ret = ctdb_control(ctdb, destnode, 0,
1842 CTDB_CONTROL_DB_GET_HEALTH, 0, data,
1843 mem_ctx, &data, &res, &timeout, NULL);
1844 if (ret != 0 || res != 0) {
1848 if (data.dsize == 0) {
1853 (*reason) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1854 if ((*reason) == NULL) {
1858 talloc_free(data.dptr);
1864 * get db sequence number
1866 int ctdb_ctrl_getdbseqnum(struct ctdb_context *ctdb, struct timeval timeout,
1867 uint32_t destnode, uint32_t dbid, uint64_t *seqnum)
1871 TDB_DATA data, outdata;
1873 data.dptr = (uint8_t *)&dbid;
1874 data.dsize = sizeof(uint64_t); /* This is just wrong */
1876 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DB_SEQNUM,
1877 0, data, ctdb, &outdata, &res, &timeout, NULL);
1878 if (ret != 0 || res != 0) {
1879 DEBUG(DEBUG_ERR,("ctdb_control for getdbesqnum failed\n"));
1883 if (outdata.dsize != sizeof(uint64_t)) {
1884 DEBUG(DEBUG_ERR,("Invalid return data in get_dbseqnum\n"));
1885 talloc_free(outdata.dptr);
1889 if (seqnum != NULL) {
1890 *seqnum = *(uint64_t *)outdata.dptr;
1892 talloc_free(outdata.dptr);
1900 int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1901 TALLOC_CTX *mem_ctx, const char *name, bool persistent)
1906 uint64_t tdb_flags = 0;
1908 data.dptr = discard_const(name);
1909 data.dsize = strlen(name)+1;
1911 /* Make sure that volatile databases use jenkins hash */
1913 tdb_flags = TDB_INCOMPATIBLE_HASH;
1916 ret = ctdb_control(ctdb, destnode, tdb_flags,
1917 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1919 mem_ctx, &data, &res, &timeout, NULL);
1921 if (ret != 0 || res != 0) {
1929 get debug level on a node
1931 int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level)
1937 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DEBUG, 0, tdb_null,
1938 ctdb, &data, &res, NULL, NULL);
1939 if (ret != 0 || res != 0) {
1942 if (data.dsize != sizeof(int32_t)) {
1943 DEBUG(DEBUG_ERR,("Bad control reply size in ctdb_get_debuglevel (got %u)\n",
1944 (unsigned)data.dsize));
1947 *level = *(int32_t *)data.dptr;
1948 talloc_free(data.dptr);
1953 set debug level on a node
1955 int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t level)
1961 data.dptr = (uint8_t *)&level;
1962 data.dsize = sizeof(level);
1964 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_DEBUG, 0, data,
1965 NULL, NULL, &res, NULL, NULL);
1966 if (ret != 0 || res != 0) {
1974 get a list of connected nodes
1976 uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
1977 struct timeval timeout,
1978 TALLOC_CTX *mem_ctx,
1979 uint32_t *num_nodes)
1981 struct ctdb_node_map *map=NULL;
1987 ret = ctdb_ctrl_getnodemap(ctdb, timeout, CTDB_CURRENT_NODE, mem_ctx, &map);
1992 nodes = talloc_array(mem_ctx, uint32_t, map->num);
1993 if (nodes == NULL) {
1997 for (i=0;i<map->num;i++) {
1998 if (!(map->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
1999 nodes[*num_nodes] = map->nodes[i].pnn;
2011 int ctdb_statistics_reset(struct ctdb_context *ctdb, uint32_t destnode)
2016 ret = ctdb_control(ctdb, destnode, 0,
2017 CTDB_CONTROL_STATISTICS_RESET, 0, tdb_null,
2018 NULL, NULL, &res, NULL, NULL);
2019 if (ret != 0 || res != 0) {
2020 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reset statistics failed\n"));
2027 attach to a specific database - client call
2029 struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb,
2030 struct timeval timeout,
2035 struct ctdb_db_context *ctdb_db;
2040 ctdb_db = ctdb_db_handle(ctdb, name);
2045 ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
2046 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db);
2048 ctdb_db->ctdb = ctdb;
2049 ctdb_db->db_name = talloc_strdup(ctdb_db, name);
2050 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db->db_name);
2052 data.dptr = discard_const(name);
2053 data.dsize = strlen(name)+1;
2055 /* CTDB has switched to using jenkins hash for volatile databases.
2056 * Even if tdb_flags do not explicitly mention TDB_INCOMPATIBLE_HASH,
2060 tdb_flags |= TDB_INCOMPATIBLE_HASH;
2063 /* tell ctdb daemon to attach */
2064 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, tdb_flags,
2065 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
2066 0, data, ctdb_db, &data, &res, NULL, NULL);
2067 if (ret != 0 || res != 0 || data.dsize != sizeof(uint32_t)) {
2068 DEBUG(DEBUG_ERR,("Failed to attach to database '%s'\n", name));
2069 talloc_free(ctdb_db);
2073 ctdb_db->db_id = *(uint32_t *)data.dptr;
2074 talloc_free(data.dptr);
2076 ret = ctdb_ctrl_getdbpath(ctdb, timeout, CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
2078 DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name));
2079 talloc_free(ctdb_db);
2083 tdb_flags = persistent?TDB_DEFAULT:TDB_NOSYNC;
2084 if (ctdb->valgrinding) {
2085 tdb_flags |= TDB_NOMMAP;
2087 tdb_flags |= TDB_DISALLOW_NESTING;
2089 ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 0, tdb_flags, O_RDWR, 0);
2090 if (ctdb_db->ltdb == NULL) {
2091 ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path);
2092 talloc_free(ctdb_db);
2096 ctdb_db->persistent = persistent;
2098 DLIST_ADD(ctdb->db_list, ctdb_db);
2100 /* add well known functions */
2101 ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC);
2102 ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC);
2103 ctdb_set_call(ctdb_db, ctdb_fetch_with_header_func, CTDB_FETCH_WITH_HEADER_FUNC);
2110 setup a call for a database
2112 int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id)
2114 struct ctdb_registered_call *call;
2119 struct ctdb_control_set_call c;
2122 /* this is no longer valid with the separate daemon architecture */
2123 c.db_id = ctdb_db->db_id;
2127 data.dptr = (uint8_t *)&c;
2128 data.dsize = sizeof(c);
2130 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_SET_CALL, 0,
2131 data, NULL, NULL, &status, NULL, NULL);
2132 if (ret != 0 || status != 0) {
2133 DEBUG(DEBUG_ERR,("ctdb_set_call failed for call %u\n", id));
2138 /* also register locally */
2139 call = talloc(ctdb_db, struct ctdb_registered_call);
2143 DLIST_ADD(ctdb_db->calls, call);
2148 struct traverse_state {
2151 ctdb_traverse_func fn;
2153 bool listemptyrecords;
2157 called on each key during a ctdb_traverse
2159 static void traverse_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data, void *p)
2161 struct traverse_state *state = (struct traverse_state *)p;
2162 struct ctdb_rec_data *d = (struct ctdb_rec_data *)data.dptr;
2165 if (data.dsize < sizeof(uint32_t) ||
2166 d->length != data.dsize) {
2167 DEBUG(DEBUG_ERR,("Bad data size %u in traverse_handler\n", (unsigned)data.dsize));
2172 key.dsize = d->keylen;
2173 key.dptr = &d->data[0];
2174 data.dsize = d->datalen;
2175 data.dptr = &d->data[d->keylen];
2177 if (key.dsize == 0 && data.dsize == 0) {
2178 /* end of traverse */
2183 if (!state->listemptyrecords &&
2184 data.dsize == sizeof(struct ctdb_ltdb_header))
2186 /* empty records are deleted records in ctdb */
2190 if (state->fn(ctdb, key, data, state->private_data) != 0) {
2198 * start a cluster wide traverse, calling the supplied fn on each record
2199 * return the number of records traversed, or -1 on error
2201 * Extendet variant with a flag to signal whether empty records should
2204 static int ctdb_traverse_ext(struct ctdb_db_context *ctdb_db,
2205 ctdb_traverse_func fn,
2206 bool withemptyrecords,
2210 struct ctdb_traverse_start_ext t;
2213 uint64_t srvid = (getpid() | 0xFLL<<60);
2214 struct traverse_state state;
2218 state.private_data = private_data;
2220 state.listemptyrecords = withemptyrecords;
2222 ret = ctdb_client_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
2224 DEBUG(DEBUG_ERR,("Failed to setup traverse handler\n"));
2228 t.db_id = ctdb_db->db_id;
2231 t.withemptyrecords = withemptyrecords;
2233 data.dptr = (uint8_t *)&t;
2234 data.dsize = sizeof(t);
2236 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_TRAVERSE_START_EXT, 0,
2237 data, NULL, NULL, &status, NULL, NULL);
2238 if (ret != 0 || status != 0) {
2239 DEBUG(DEBUG_ERR,("ctdb_traverse_all failed\n"));
2240 ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
2244 while (!state.done) {
2245 event_loop_once(ctdb_db->ctdb->ev);
2248 ret = ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
2250 DEBUG(DEBUG_ERR,("Failed to remove ctdb_traverse handler\n"));
2258 * start a cluster wide traverse, calling the supplied fn on each record
2259 * return the number of records traversed, or -1 on error
2261 * Standard version which does not list the empty records:
2262 * These are considered deleted.
2264 int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *private_data)
2266 return ctdb_traverse_ext(ctdb_db, fn, false, private_data);
2269 #define ISASCII(x) (isprint(x) && !strchr("\"\\", (x)))
2271 called on each key during a catdb
2273 int ctdb_dumpdb_record(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
2276 struct ctdb_dump_db_context *c = (struct ctdb_dump_db_context *)p;
2278 struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
2280 fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
2281 for (i=0;i<key.dsize;i++) {
2282 if (ISASCII(key.dptr[i])) {
2283 fprintf(f, "%c", key.dptr[i]);
2285 fprintf(f, "\\%02X", key.dptr[i]);
2290 fprintf(f, "dmaster: %u\n", h->dmaster);
2291 fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
2293 if (c->printlmaster && ctdb->vnn_map != NULL) {
2294 fprintf(f, "lmaster: %u\n", ctdb_lmaster(ctdb, &key));
2298 fprintf(f, "hash: 0x%08x\n", ctdb_hash(&key));
2301 if (c->printrecordflags) {
2302 fprintf(f, "flags: 0x%08x", h->flags);
2303 if (h->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA) printf(" MIGRATED_WITH_DATA");
2304 if (h->flags & CTDB_REC_FLAG_VACUUM_MIGRATED) printf(" VACUUM_MIGRATED");
2305 if (h->flags & CTDB_REC_FLAG_AUTOMATIC) printf(" AUTOMATIC");
2306 if (h->flags & CTDB_REC_RO_HAVE_DELEGATIONS) printf(" RO_HAVE_DELEGATIONS");
2307 if (h->flags & CTDB_REC_RO_HAVE_READONLY) printf(" RO_HAVE_READONLY");
2308 if (h->flags & CTDB_REC_RO_REVOKING_READONLY) printf(" RO_REVOKING_READONLY");
2309 if (h->flags & CTDB_REC_RO_REVOKE_COMPLETE) printf(" RO_REVOKE_COMPLETE");
2313 if (c->printdatasize) {
2314 fprintf(f, "data size: %u\n", (unsigned)data.dsize);
2316 fprintf(f, "data(%u) = \"", (unsigned)(data.dsize - sizeof(*h)));
2317 for (i=sizeof(*h);i<data.dsize;i++) {
2318 if (ISASCII(data.dptr[i])) {
2319 fprintf(f, "%c", data.dptr[i]);
2321 fprintf(f, "\\%02X", data.dptr[i]);
2333 convenience function to list all keys to stdout
2335 int ctdb_dump_db(struct ctdb_db_context *ctdb_db,
2336 struct ctdb_dump_db_context *ctx)
2338 return ctdb_traverse_ext(ctdb_db, ctdb_dumpdb_record,
2339 ctx->printemptyrecords, ctx);
2343 get the pid of a ctdb daemon
2345 int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid)
2350 ret = ctdb_control(ctdb, destnode, 0,
2351 CTDB_CONTROL_GET_PID, 0, tdb_null,
2352 NULL, NULL, &res, &timeout, NULL);
2354 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpid failed\n"));
2365 async freeze send control
2367 struct ctdb_client_control_state *
2368 ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t priority)
2370 return ctdb_control_send(ctdb, destnode, priority,
2371 CTDB_CONTROL_FREEZE, 0, tdb_null,
2372 mem_ctx, &timeout, NULL);
2376 async freeze recv control
2378 int ctdb_ctrl_freeze_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state)
2383 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
2384 if ( (ret != 0) || (res != 0) ){
2385 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_freeze_recv failed\n"));
2393 freeze databases of a certain priority
2395 int ctdb_ctrl_freeze_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
2397 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2398 struct ctdb_client_control_state *state;
2401 state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode, priority);
2402 ret = ctdb_ctrl_freeze_recv(ctdb, tmp_ctx, state);
2403 talloc_free(tmp_ctx);
2408 /* Freeze all databases */
2409 int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2413 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
2414 if (ctdb_ctrl_freeze_priority(ctdb, timeout, destnode, i) != 0) {
2422 thaw databases of a certain priority
2424 int ctdb_ctrl_thaw_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
2429 ret = ctdb_control(ctdb, destnode, priority,
2430 CTDB_CONTROL_THAW, 0, tdb_null,
2431 NULL, NULL, &res, &timeout, NULL);
2432 if (ret != 0 || res != 0) {
2433 DEBUG(DEBUG_ERR,(__location__ " ctdb_control thaw failed\n"));
2440 /* thaw all databases */
2441 int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2443 return ctdb_ctrl_thaw_priority(ctdb, timeout, destnode, 0);
2447 get pnn of a node, or -1
2449 int ctdb_ctrl_getpnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2454 ret = ctdb_control(ctdb, destnode, 0,
2455 CTDB_CONTROL_GET_PNN, 0, tdb_null,
2456 NULL, NULL, &res, &timeout, NULL);
2458 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpnn failed\n"));
2466 get the monitoring mode of a remote node
2468 int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *monmode)
2473 ret = ctdb_control(ctdb, destnode, 0,
2474 CTDB_CONTROL_GET_MONMODE, 0, tdb_null,
2475 NULL, NULL, &res, &timeout, NULL);
2477 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getmonmode failed\n"));
2488 set the monitoring mode of a remote node to active
2490 int ctdb_ctrl_enable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2495 ret = ctdb_control(ctdb, destnode, 0,
2496 CTDB_CONTROL_ENABLE_MONITOR, 0, tdb_null,
2497 NULL, NULL,NULL, &timeout, NULL);
2499 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enable_monitor failed\n"));
2509 set the monitoring mode of a remote node to disable
2511 int ctdb_ctrl_disable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2516 ret = ctdb_control(ctdb, destnode, 0,
2517 CTDB_CONTROL_DISABLE_MONITOR, 0, tdb_null,
2518 NULL, NULL, NULL, &timeout, NULL);
2520 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disable_monitor failed\n"));
2532 sent to a node to make it take over an ip address
2534 int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout,
2535 uint32_t destnode, struct ctdb_public_ip *ip)
2538 struct ctdb_public_ipv4 ipv4;
2542 if (ip->addr.sa.sa_family == AF_INET) {
2544 ipv4.sin = ip->addr.ip;
2546 data.dsize = sizeof(ipv4);
2547 data.dptr = (uint8_t *)&ipv4;
2549 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IPv4, 0, data, NULL,
2550 NULL, &res, &timeout, NULL);
2552 data.dsize = sizeof(*ip);
2553 data.dptr = (uint8_t *)ip;
2555 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IP, 0, data, NULL,
2556 NULL, &res, &timeout, NULL);
2559 if (ret != 0 || res != 0) {
2560 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for takeover_ip failed\n"));
2569 sent to a node to make it release an ip address
2571 int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout,
2572 uint32_t destnode, struct ctdb_public_ip *ip)
2575 struct ctdb_public_ipv4 ipv4;
2579 if (ip->addr.sa.sa_family == AF_INET) {
2581 ipv4.sin = ip->addr.ip;
2583 data.dsize = sizeof(ipv4);
2584 data.dptr = (uint8_t *)&ipv4;
2586 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IPv4, 0, data, NULL,
2587 NULL, &res, &timeout, NULL);
2589 data.dsize = sizeof(*ip);
2590 data.dptr = (uint8_t *)ip;
2592 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IP, 0, data, NULL,
2593 NULL, &res, &timeout, NULL);
2596 if (ret != 0 || res != 0) {
2597 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for release_ip failed\n"));
2608 int ctdb_ctrl_get_tunable(struct ctdb_context *ctdb,
2609 struct timeval timeout,
2611 const char *name, uint32_t *value)
2613 struct ctdb_control_get_tunable *t;
2614 TDB_DATA data, outdata;
2618 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(name) + 1;
2619 data.dptr = talloc_size(ctdb, data.dsize);
2620 CTDB_NO_MEMORY(ctdb, data.dptr);
2622 t = (struct ctdb_control_get_tunable *)data.dptr;
2623 t->length = strlen(name)+1;
2624 memcpy(t->name, name, t->length);
2626 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_TUNABLE, 0, data, ctdb,
2627 &outdata, &res, &timeout, NULL);
2628 talloc_free(data.dptr);
2629 if (ret != 0 || res != 0) {
2630 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_tunable failed\n"));
2631 return ret != 0 ? ret : res;
2634 if (outdata.dsize != sizeof(uint32_t)) {
2635 DEBUG(DEBUG_ERR,("Invalid return data in get_tunable\n"));
2636 talloc_free(outdata.dptr);
2640 *value = *(uint32_t *)outdata.dptr;
2641 talloc_free(outdata.dptr);
2649 int ctdb_ctrl_set_tunable(struct ctdb_context *ctdb,
2650 struct timeval timeout,
2652 const char *name, uint32_t value)
2654 struct ctdb_control_set_tunable *t;
2659 data.dsize = offsetof(struct ctdb_control_set_tunable, name) + strlen(name) + 1;
2660 data.dptr = talloc_size(ctdb, data.dsize);
2661 CTDB_NO_MEMORY(ctdb, data.dptr);
2663 t = (struct ctdb_control_set_tunable *)data.dptr;
2664 t->length = strlen(name)+1;
2665 memcpy(t->name, name, t->length);
2668 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_TUNABLE, 0, data, NULL,
2669 NULL, &res, &timeout, NULL);
2670 talloc_free(data.dptr);
2671 if (ret != 0 || res != 0) {
2672 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_tunable failed\n"));
2682 int ctdb_ctrl_list_tunables(struct ctdb_context *ctdb,
2683 struct timeval timeout,
2685 TALLOC_CTX *mem_ctx,
2686 const char ***list, uint32_t *count)
2691 struct ctdb_control_list_tunable *t;
2694 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_LIST_TUNABLES, 0, tdb_null,
2695 mem_ctx, &outdata, &res, &timeout, NULL);
2696 if (ret != 0 || res != 0) {
2697 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for list_tunables failed\n"));
2701 t = (struct ctdb_control_list_tunable *)outdata.dptr;
2702 if (outdata.dsize < offsetof(struct ctdb_control_list_tunable, data) ||
2703 t->length > outdata.dsize-offsetof(struct ctdb_control_list_tunable, data)) {
2704 DEBUG(DEBUG_ERR,("Invalid data in list_tunables reply\n"));
2705 talloc_free(outdata.dptr);
2709 p = talloc_strndup(mem_ctx, (char *)t->data, t->length);
2710 CTDB_NO_MEMORY(ctdb, p);
2712 talloc_free(outdata.dptr);
2717 for (s=strtok_r(p, ":", &ptr); s; s=strtok_r(NULL, ":", &ptr)) {
2718 (*list) = talloc_realloc(mem_ctx, *list, const char *, 1+(*count));
2719 CTDB_NO_MEMORY(ctdb, *list);
2720 (*list)[*count] = talloc_strdup(*list, s);
2721 CTDB_NO_MEMORY(ctdb, (*list)[*count]);
2731 int ctdb_ctrl_get_public_ips_flags(struct ctdb_context *ctdb,
2732 struct timeval timeout, uint32_t destnode,
2733 TALLOC_CTX *mem_ctx,
2735 struct ctdb_all_public_ips **ips)
2741 ret = ctdb_control(ctdb, destnode, 0,
2742 CTDB_CONTROL_GET_PUBLIC_IPS, flags, tdb_null,
2743 mem_ctx, &outdata, &res, &timeout, NULL);
2744 if (ret == 0 && res == -1) {
2745 DEBUG(DEBUG_ERR,(__location__ " ctdb_control to get public ips failed, falling back to ipv4-only version\n"));
2746 return ctdb_ctrl_get_public_ipsv4(ctdb, timeout, destnode, mem_ctx, ips);
2748 if (ret != 0 || res != 0) {
2749 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed ret:%d res:%d\n", ret, res));
2753 *ips = (struct ctdb_all_public_ips *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
2754 talloc_free(outdata.dptr);
2759 int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
2760 struct timeval timeout, uint32_t destnode,
2761 TALLOC_CTX *mem_ctx,
2762 struct ctdb_all_public_ips **ips)
2764 return ctdb_ctrl_get_public_ips_flags(ctdb, timeout,
2769 int ctdb_ctrl_get_public_ipsv4(struct ctdb_context *ctdb,
2770 struct timeval timeout, uint32_t destnode,
2771 TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
2776 struct ctdb_all_public_ipsv4 *ipsv4;
2778 ret = ctdb_control(ctdb, destnode, 0,
2779 CTDB_CONTROL_GET_PUBLIC_IPSv4, 0, tdb_null,
2780 mem_ctx, &outdata, &res, &timeout, NULL);
2781 if (ret != 0 || res != 0) {
2782 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed\n"));
2786 ipsv4 = (struct ctdb_all_public_ipsv4 *)outdata.dptr;
2787 len = offsetof(struct ctdb_all_public_ips, ips) +
2788 ipsv4->num*sizeof(struct ctdb_public_ip);
2789 *ips = talloc_zero_size(mem_ctx, len);
2790 CTDB_NO_MEMORY(ctdb, *ips);
2791 (*ips)->num = ipsv4->num;
2792 for (i=0; i<ipsv4->num; i++) {
2793 (*ips)->ips[i].pnn = ipsv4->ips[i].pnn;
2794 (*ips)->ips[i].addr.ip = ipsv4->ips[i].sin;
2797 talloc_free(outdata.dptr);
2802 int ctdb_ctrl_get_public_ip_info(struct ctdb_context *ctdb,
2803 struct timeval timeout, uint32_t destnode,
2804 TALLOC_CTX *mem_ctx,
2805 const ctdb_sock_addr *addr,
2806 struct ctdb_control_public_ip_info **_info)
2812 struct ctdb_control_public_ip_info *info;
2816 indata.dptr = discard_const_p(uint8_t, addr);
2817 indata.dsize = sizeof(*addr);
2819 ret = ctdb_control(ctdb, destnode, 0,
2820 CTDB_CONTROL_GET_PUBLIC_IP_INFO, 0, indata,
2821 mem_ctx, &outdata, &res, &timeout, NULL);
2822 if (ret != 0 || res != 0) {
2823 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2824 "failed ret:%d res:%d\n",
2829 len = offsetof(struct ctdb_control_public_ip_info, ifaces);
2830 if (len > outdata.dsize) {
2831 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2832 "returned invalid data with size %u > %u\n",
2833 (unsigned int)outdata.dsize,
2834 (unsigned int)len));
2835 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2839 info = (struct ctdb_control_public_ip_info *)outdata.dptr;
2840 len += info->num*sizeof(struct ctdb_control_iface_info);
2842 if (len > outdata.dsize) {
2843 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2844 "returned invalid data with size %u > %u\n",
2845 (unsigned int)outdata.dsize,
2846 (unsigned int)len));
2847 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2851 /* make sure we null terminate the returned strings */
2852 for (i=0; i < info->num; i++) {
2853 info->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2856 *_info = (struct ctdb_control_public_ip_info *)talloc_memdup(mem_ctx,
2859 talloc_free(outdata.dptr);
2860 if (*_info == NULL) {
2861 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2862 "talloc_memdup size %u failed\n",
2863 (unsigned int)outdata.dsize));
2870 int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb,
2871 struct timeval timeout, uint32_t destnode,
2872 TALLOC_CTX *mem_ctx,
2873 struct ctdb_control_get_ifaces **_ifaces)
2878 struct ctdb_control_get_ifaces *ifaces;
2882 ret = ctdb_control(ctdb, destnode, 0,
2883 CTDB_CONTROL_GET_IFACES, 0, tdb_null,
2884 mem_ctx, &outdata, &res, &timeout, NULL);
2885 if (ret != 0 || res != 0) {
2886 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2887 "failed ret:%d res:%d\n",
2892 len = offsetof(struct ctdb_control_get_ifaces, ifaces);
2893 if (len > outdata.dsize) {
2894 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2895 "returned invalid data with size %u > %u\n",
2896 (unsigned int)outdata.dsize,
2897 (unsigned int)len));
2898 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2902 ifaces = (struct ctdb_control_get_ifaces *)outdata.dptr;
2903 len += ifaces->num*sizeof(struct ctdb_control_iface_info);
2905 if (len > outdata.dsize) {
2906 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2907 "returned invalid data with size %u > %u\n",
2908 (unsigned int)outdata.dsize,
2909 (unsigned int)len));
2910 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2914 /* make sure we null terminate the returned strings */
2915 for (i=0; i < ifaces->num; i++) {
2916 ifaces->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2919 *_ifaces = (struct ctdb_control_get_ifaces *)talloc_memdup(mem_ctx,
2922 talloc_free(outdata.dptr);
2923 if (*_ifaces == NULL) {
2924 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2925 "talloc_memdup size %u failed\n",
2926 (unsigned int)outdata.dsize));
2933 int ctdb_ctrl_set_iface_link(struct ctdb_context *ctdb,
2934 struct timeval timeout, uint32_t destnode,
2935 TALLOC_CTX *mem_ctx,
2936 const struct ctdb_control_iface_info *info)
2942 indata.dptr = discard_const_p(uint8_t, info);
2943 indata.dsize = sizeof(*info);
2945 ret = ctdb_control(ctdb, destnode, 0,
2946 CTDB_CONTROL_SET_IFACE_LINK_STATE, 0, indata,
2947 mem_ctx, NULL, &res, &timeout, NULL);
2948 if (ret != 0 || res != 0) {
2949 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set iface link "
2950 "failed ret:%d res:%d\n",
2959 set/clear the permanent disabled bit on a remote node
2961 int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
2962 uint32_t set, uint32_t clear)
2966 struct ctdb_node_map *nodemap=NULL;
2967 struct ctdb_node_flag_change c;
2968 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2973 /* find the recovery master */
2974 ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster);
2976 DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
2977 talloc_free(tmp_ctx);
2982 /* read the node flags from the recmaster */
2983 ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap);
2985 DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode));
2986 talloc_free(tmp_ctx);
2989 if (destnode >= nodemap->num) {
2990 DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode));
2991 talloc_free(tmp_ctx);
2996 c.old_flags = nodemap->nodes[destnode].flags;
2997 c.new_flags = c.old_flags;
2999 c.new_flags &= ~clear;
3001 data.dsize = sizeof(c);
3002 data.dptr = (unsigned char *)&c;
3004 /* send the flags update to all connected nodes */
3005 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
3007 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
3009 timeout, false, data,
3012 DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
3014 talloc_free(tmp_ctx);
3018 talloc_free(tmp_ctx);
3026 int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
3027 struct timeval timeout,
3029 struct ctdb_tunable *tunables)
3035 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_ALL_TUNABLES, 0, tdb_null, ctdb,
3036 &outdata, &res, &timeout, NULL);
3037 if (ret != 0 || res != 0) {
3038 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get all tunables failed\n"));
3042 if (outdata.dsize != sizeof(*tunables)) {
3043 DEBUG(DEBUG_ERR,(__location__ " bad data size %u in ctdb_ctrl_get_all_tunables should be %u\n",
3044 (unsigned)outdata.dsize, (unsigned)sizeof(*tunables)));
3048 *tunables = *(struct ctdb_tunable *)outdata.dptr;
3049 talloc_free(outdata.dptr);
3054 add a public address to a node
3056 int ctdb_ctrl_add_public_ip(struct ctdb_context *ctdb,
3057 struct timeval timeout,
3059 struct ctdb_control_ip_iface *pub)
3065 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
3066 data.dptr = (unsigned char *)pub;
3068 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_ADD_PUBLIC_IP, 0, data, NULL,
3069 NULL, &res, &timeout, NULL);
3070 if (ret != 0 || res != 0) {
3071 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for add_public_ip failed\n"));
3079 delete a public address from a node
3081 int ctdb_ctrl_del_public_ip(struct ctdb_context *ctdb,
3082 struct timeval timeout,
3084 struct ctdb_control_ip_iface *pub)
3090 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
3091 data.dptr = (unsigned char *)pub;
3093 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_DEL_PUBLIC_IP, 0, data, NULL,
3094 NULL, &res, &timeout, NULL);
3095 if (ret != 0 || res != 0) {
3096 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for del_public_ip failed\n"));
3104 kill a tcp connection
3106 int ctdb_ctrl_killtcp(struct ctdb_context *ctdb,
3107 struct timeval timeout,
3109 struct ctdb_control_killtcp *killtcp)
3115 data.dsize = sizeof(struct ctdb_control_killtcp);
3116 data.dptr = (unsigned char *)killtcp;
3118 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_KILL_TCP, 0, data, NULL,
3119 NULL, &res, &timeout, NULL);
3120 if (ret != 0 || res != 0) {
3121 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for killtcp failed\n"));
3131 int ctdb_ctrl_gratious_arp(struct ctdb_context *ctdb,
3132 struct timeval timeout,
3134 ctdb_sock_addr *addr,
3140 struct ctdb_control_gratious_arp *gratious_arp;
3141 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
3144 len = strlen(ifname)+1;
3145 gratious_arp = talloc_size(tmp_ctx,
3146 offsetof(struct ctdb_control_gratious_arp, iface) + len);
3147 CTDB_NO_MEMORY(ctdb, gratious_arp);
3149 gratious_arp->addr = *addr;
3150 gratious_arp->len = len;
3151 memcpy(&gratious_arp->iface[0], ifname, len);
3154 data.dsize = offsetof(struct ctdb_control_gratious_arp, iface) + len;
3155 data.dptr = (unsigned char *)gratious_arp;
3157 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SEND_GRATIOUS_ARP, 0, data, NULL,
3158 NULL, &res, &timeout, NULL);
3159 if (ret != 0 || res != 0) {
3160 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for gratious_arp failed\n"));
3161 talloc_free(tmp_ctx);
3165 talloc_free(tmp_ctx);
3170 get a list of all tcp tickles that a node knows about for a particular vnn
3172 int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
3173 struct timeval timeout, uint32_t destnode,
3174 TALLOC_CTX *mem_ctx,
3175 ctdb_sock_addr *addr,
3176 struct ctdb_control_tcp_tickle_list **list)
3179 TDB_DATA data, outdata;
3182 data.dptr = (uint8_t*)addr;
3183 data.dsize = sizeof(ctdb_sock_addr);
3185 ret = ctdb_control(ctdb, destnode, 0,
3186 CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data,
3187 mem_ctx, &outdata, &status, NULL, NULL);
3188 if (ret != 0 || status != 0) {
3189 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get tcp tickles failed\n"));
3193 *list = (struct ctdb_control_tcp_tickle_list *)outdata.dptr;
3199 register a server id
3201 int ctdb_ctrl_register_server_id(struct ctdb_context *ctdb,
3202 struct timeval timeout,
3203 struct ctdb_server_id *id)
3209 data.dsize = sizeof(struct ctdb_server_id);
3210 data.dptr = (unsigned char *)id;
3212 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
3213 CTDB_CONTROL_REGISTER_SERVER_ID,
3215 NULL, &res, &timeout, NULL);
3216 if (ret != 0 || res != 0) {
3217 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for register server id failed\n"));
3225 unregister a server id
3227 int ctdb_ctrl_unregister_server_id(struct ctdb_context *ctdb,
3228 struct timeval timeout,
3229 struct ctdb_server_id *id)
3235 data.dsize = sizeof(struct ctdb_server_id);
3236 data.dptr = (unsigned char *)id;
3238 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
3239 CTDB_CONTROL_UNREGISTER_SERVER_ID,
3241 NULL, &res, &timeout, NULL);
3242 if (ret != 0 || res != 0) {
3243 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for unregister server id failed\n"));
3252 check if a server id exists
3254 if a server id does exist, return *status == 1, otherwise *status == 0
3256 int ctdb_ctrl_check_server_id(struct ctdb_context *ctdb,
3257 struct timeval timeout,
3259 struct ctdb_server_id *id,
3266 data.dsize = sizeof(struct ctdb_server_id);
3267 data.dptr = (unsigned char *)id;
3269 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CHECK_SERVER_ID,
3271 NULL, &res, &timeout, NULL);
3273 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for check server id failed\n"));
3287 get the list of server ids that are registered on a node
3289 int ctdb_ctrl_get_server_id_list(struct ctdb_context *ctdb,
3290 TALLOC_CTX *mem_ctx,
3291 struct timeval timeout, uint32_t destnode,
3292 struct ctdb_server_id_list **svid_list)
3298 ret = ctdb_control(ctdb, destnode, 0,
3299 CTDB_CONTROL_GET_SERVER_ID_LIST, 0, tdb_null,
3300 mem_ctx, &outdata, &res, &timeout, NULL);
3301 if (ret != 0 || res != 0) {
3302 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_server_id_list failed\n"));
3306 *svid_list = (struct ctdb_server_id_list *)talloc_steal(mem_ctx, outdata.dptr);
3312 initialise the ctdb daemon for client applications
3314 NOTE: In current code the daemon does not fork. This is for testing purposes only
3315 and to simplify the code.
3317 struct ctdb_context *ctdb_init(struct event_context *ev)
3320 struct ctdb_context *ctdb;
3322 ctdb = talloc_zero(ev, struct ctdb_context);
3324 DEBUG(DEBUG_ERR,(__location__ " talloc_zero failed.\n"));
3328 ctdb->idr = idr_init(ctdb);
3329 /* Wrap early to exercise code. */
3330 ctdb->lastid = INT_MAX-200;
3331 CTDB_NO_MEMORY_NULL(ctdb, ctdb->idr);
3333 ret = ctdb_set_socketname(ctdb, CTDB_PATH);
3335 DEBUG(DEBUG_ERR,(__location__ " ctdb_set_socketname failed.\n"));
3340 ctdb->statistics.statistics_start_time = timeval_current();
3349 void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags)
3351 ctdb->flags |= flags;
3355 setup the local socket name
3357 int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
3359 ctdb->daemon.name = talloc_strdup(ctdb, socketname);
3360 CTDB_NO_MEMORY(ctdb, ctdb->daemon.name);
3365 const char *ctdb_get_socketname(struct ctdb_context *ctdb)
3367 return ctdb->daemon.name;
3371 return the pnn of this node
3373 uint32_t ctdb_get_pnn(struct ctdb_context *ctdb)
3380 get the uptime of a remote node
3382 struct ctdb_client_control_state *
3383 ctdb_ctrl_uptime_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
3385 return ctdb_control_send(ctdb, destnode, 0,
3386 CTDB_CONTROL_UPTIME, 0, tdb_null,
3387 mem_ctx, &timeout, NULL);
3390 int ctdb_ctrl_uptime_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, struct ctdb_uptime **uptime)
3396 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
3397 if (ret != 0 || res != 0) {
3398 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_uptime_recv failed\n"));
3402 *uptime = (struct ctdb_uptime *)talloc_steal(mem_ctx, outdata.dptr);
3407 int ctdb_ctrl_uptime(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_uptime **uptime)
3409 struct ctdb_client_control_state *state;
3411 state = ctdb_ctrl_uptime_send(ctdb, mem_ctx, timeout, destnode);
3412 return ctdb_ctrl_uptime_recv(ctdb, mem_ctx, state, uptime);
3416 send a control to execute the "recovered" event script on a node
3418 int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3423 ret = ctdb_control(ctdb, destnode, 0,
3424 CTDB_CONTROL_END_RECOVERY, 0, tdb_null,
3425 NULL, NULL, &status, &timeout, NULL);
3426 if (ret != 0 || status != 0) {
3427 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for end_recovery failed\n"));
3435 callback for the async helpers used when sending the same control
3436 to multiple nodes in parallell.
3438 static void async_callback(struct ctdb_client_control_state *state)
3440 struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
3441 struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
3445 uint32_t destnode = state->c->hdr.destnode;
3447 /* one more node has responded with recmode data */
3450 /* if we failed to push the db, then return an error and let
3451 the main loop try again.
3453 if (state->state != CTDB_CONTROL_DONE) {
3454 if ( !data->dont_log_errors) {
3455 DEBUG(DEBUG_ERR,("Async operation failed with state %d, opcode:%u\n", state->state, data->opcode));
3458 if (state->state == CTDB_CONTROL_TIMEOUT) {
3463 if (data->fail_callback) {
3464 data->fail_callback(ctdb, destnode, res, outdata,
3465 data->callback_data);
3470 state->async.fn = NULL;
3472 ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL);
3473 if ((ret != 0) || (res != 0)) {
3474 if ( !data->dont_log_errors) {
3475 DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d opcode=%u\n", ret, (int)res, data->opcode));
3478 if (data->fail_callback) {
3479 data->fail_callback(ctdb, destnode, res, outdata,
3480 data->callback_data);
3483 if ((ret == 0) && (data->callback != NULL)) {
3484 data->callback(ctdb, destnode, res, outdata,
3485 data->callback_data);
3490 void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state)
3492 /* set up the callback functions */
3493 state->async.fn = async_callback;
3494 state->async.private_data = data;
3496 /* one more control to wait for to complete */
3501 /* wait for up to the maximum number of seconds allowed
3502 or until all nodes we expect a response from has replied
3504 int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data)
3506 while (data->count > 0) {
3507 event_loop_once(ctdb->ev);
3509 if (data->fail_count != 0) {
3510 if (!data->dont_log_errors) {
3511 DEBUG(DEBUG_ERR,("Async wait failed - fail_count=%u\n",
3521 perform a simple control on the listed nodes
3522 The control cannot return data
3524 int ctdb_client_async_control(struct ctdb_context *ctdb,
3525 enum ctdb_controls opcode,
3528 struct timeval timeout,
3529 bool dont_log_errors,
3531 client_async_callback client_callback,
3532 client_async_callback fail_callback,
3533 void *callback_data)
3535 struct client_async_data *async_data;
3536 struct ctdb_client_control_state *state;
3539 async_data = talloc_zero(ctdb, struct client_async_data);
3540 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
3541 async_data->dont_log_errors = dont_log_errors;
3542 async_data->callback = client_callback;
3543 async_data->fail_callback = fail_callback;
3544 async_data->callback_data = callback_data;
3545 async_data->opcode = opcode;
3547 num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
3549 /* loop over all nodes and send an async control to each of them */
3550 for (j=0; j<num_nodes; j++) {
3551 uint32_t pnn = nodes[j];
3553 state = ctdb_control_send(ctdb, pnn, srvid, opcode,
3554 0, data, async_data, &timeout, NULL);
3555 if (state == NULL) {
3556 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
3557 talloc_free(async_data);
3561 ctdb_client_async_add(async_data, state);
3564 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3565 talloc_free(async_data);
3569 talloc_free(async_data);
3573 uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
3574 struct ctdb_vnn_map *vnn_map,
3575 TALLOC_CTX *mem_ctx,
3578 int i, j, num_nodes;
3581 for (i=num_nodes=0;i<vnn_map->size;i++) {
3582 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3588 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3589 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3591 for (i=j=0;i<vnn_map->size;i++) {
3592 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3595 nodes[j++] = vnn_map->map[i];
3601 /* Get list of nodes not including those with flags specified by mask.
3602 * If exclude_pnn is not -1 then exclude that pnn from the list.
3604 uint32_t *list_of_nodes(struct ctdb_context *ctdb,
3605 struct ctdb_node_map *node_map,
3606 TALLOC_CTX *mem_ctx,
3610 int i, j, num_nodes;
3613 for (i=num_nodes=0;i<node_map->num;i++) {
3614 if (node_map->nodes[i].flags & mask) {
3617 if (node_map->nodes[i].pnn == exclude_pnn) {
3623 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3624 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3626 for (i=j=0;i<node_map->num;i++) {
3627 if (node_map->nodes[i].flags & mask) {
3630 if (node_map->nodes[i].pnn == exclude_pnn) {
3633 nodes[j++] = node_map->nodes[i].pnn;
3639 uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
3640 struct ctdb_node_map *node_map,
3641 TALLOC_CTX *mem_ctx,
3644 return list_of_nodes(ctdb, node_map, mem_ctx, NODE_FLAGS_INACTIVE,
3645 include_self ? -1 : ctdb->pnn);
3648 uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
3649 struct ctdb_node_map *node_map,
3650 TALLOC_CTX *mem_ctx,
3653 return list_of_nodes(ctdb, node_map, mem_ctx, NODE_FLAGS_DISCONNECTED,
3654 include_self ? -1 : ctdb->pnn);
3658 this is used to test if a pnn lock exists and if it exists will return
3659 the number of connections that pnn has reported or -1 if that recovery
3660 daemon is not running.
3663 ctdb_read_pnn_lock(int fd, int32_t pnn)
3668 lock.l_type = F_WRLCK;
3669 lock.l_whence = SEEK_SET;
3674 if (fcntl(fd, F_GETLK, &lock) != 0) {
3675 DEBUG(DEBUG_ERR, (__location__ " F_GETLK failed with %s\n", strerror(errno)));
3679 if (lock.l_type == F_UNLCK) {
3683 if (pread(fd, &c, 1, pnn) == -1) {
3684 DEBUG(DEBUG_CRIT,(__location__ " failed read pnn count - %s\n", strerror(errno)));
3692 get capabilities of a remote node
3694 struct ctdb_client_control_state *
3695 ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
3697 return ctdb_control_send(ctdb, destnode, 0,
3698 CTDB_CONTROL_GET_CAPABILITIES, 0, tdb_null,
3699 mem_ctx, &timeout, NULL);
3702 int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities)
3708 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
3709 if ( (ret != 0) || (res != 0) ) {
3710 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n"));
3715 *capabilities = *((uint32_t *)outdata.dptr);
3721 int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities)
3723 struct ctdb_client_control_state *state;
3724 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
3727 state = ctdb_ctrl_getcapabilities_send(ctdb, tmp_ctx, timeout, destnode);
3728 ret = ctdb_ctrl_getcapabilities_recv(ctdb, tmp_ctx, state, capabilities);
3729 talloc_free(tmp_ctx);
3740 static struct server_id server_id_get(struct ctdb_context *ctdb, uint32_t reqid)
3742 struct server_id id;
3746 id.vnn = ctdb_get_pnn(ctdb);
3747 id.unique_id = id.vnn;
3748 id.unique_id = (id.unique_id << 32) | reqid;
3753 static bool server_id_equal(struct server_id *id1, struct server_id *id2)
3755 if (id1->pid != id2->pid) {
3759 if (id1->task_id != id2->task_id) {
3763 if (id1->vnn != id2->vnn) {
3767 if (id1->unique_id != id2->unique_id) {
3774 static bool server_id_exists(struct ctdb_context *ctdb, struct server_id *id)
3776 struct ctdb_server_id sid;
3780 sid.type = SERVER_TYPE_SAMBA;
3782 sid.server_id = id->pid;
3784 ret = ctdb_ctrl_check_server_id(ctdb, timeval_current_ofs(3,0),
3785 id->vnn, &sid, &result);
3787 /* If control times out, assume server_id exists. */
3800 * check whether a transaction is active on a given db on a given node
3802 int32_t ctdb_ctrl_transaction_active(struct ctdb_context *ctdb,
3810 indata.dptr = (uint8_t *)&db_id;
3811 indata.dsize = sizeof(db_id);
3813 ret = ctdb_control(ctdb, destnode, 0,
3814 CTDB_CONTROL_TRANS2_ACTIVE,
3815 0, indata, NULL, NULL, &status,
3819 DEBUG(DEBUG_ERR, (__location__ " ctdb control for transaction_active failed\n"));
3827 struct ctdb_transaction_handle {
3828 struct ctdb_db_context *ctdb_db;
3831 * we store the reads and writes done under a transaction:
3832 * - one list stores both reads and writes (m_all),
3833 * - the other just writes (m_write)
3835 struct ctdb_marshall_buffer *m_all;
3836 struct ctdb_marshall_buffer *m_write;
3839 /* start a transaction on a database */
3840 static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h)
3842 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3846 /* start a transaction on a database */
3847 static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
3849 struct ctdb_record_handle *rh;
3852 struct ctdb_ltdb_header header;
3853 TALLOC_CTX *tmp_ctx;
3854 const char *keyname = CTDB_TRANSACTION_LOCK_KEY;
3856 struct ctdb_db_context *ctdb_db = h->ctdb_db;
3860 key.dptr = discard_const(keyname);
3861 key.dsize = strlen(keyname);
3863 if (!ctdb_db->persistent) {
3864 DEBUG(DEBUG_ERR,(__location__ " Attempted transaction on non-persistent database\n"));
3869 tmp_ctx = talloc_new(h);
3871 rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL);
3873 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n"));
3874 talloc_free(tmp_ctx);
3878 status = ctdb_ctrl_transaction_active(ctdb_db->ctdb,
3882 unsigned long int usec = (1000 + random()) % 100000;
3883 DEBUG(DEBUG_DEBUG, (__location__ " transaction is active "
3884 "on db_id[0x%08x]. waiting for %lu "
3886 ctdb_db->db_id, usec));
3887 talloc_free(tmp_ctx);
3893 * store the pid in the database:
3894 * it is not enough that the node is dmaster...
3897 data.dptr = (unsigned char *)&pid;
3898 data.dsize = sizeof(pid_t);
3900 rh->header.dmaster = ctdb_db->ctdb->pnn;
3901 ret = ctdb_ltdb_store(ctdb_db, key, &(rh->header), data);
3903 DEBUG(DEBUG_ERR, (__location__ " Failed to store pid in "
3904 "transaction record\n"));
3905 talloc_free(tmp_ctx);
3911 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
3913 DEBUG(DEBUG_ERR,(__location__ " Failed to start tdb transaction\n"));
3914 talloc_free(tmp_ctx);
3918 ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, &data);
3920 DEBUG(DEBUG_ERR,(__location__ " Failed to re-fetch transaction "
3921 "lock record inside transaction\n"));
3922 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3923 talloc_free(tmp_ctx);
3927 if (header.dmaster != ctdb_db->ctdb->pnn) {
3928 DEBUG(DEBUG_DEBUG,(__location__ " not dmaster any more on "
3929 "transaction lock record\n"));
3930 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3931 talloc_free(tmp_ctx);
3935 if ((data.dsize != sizeof(pid_t)) || (*(pid_t *)(data.dptr) != pid)) {
3936 DEBUG(DEBUG_DEBUG, (__location__ " my pid is not stored in "
3937 "the transaction lock record\n"));
3938 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3939 talloc_free(tmp_ctx);
3943 talloc_free(tmp_ctx);
3949 /* start a transaction on a database */
3950 struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
3951 TALLOC_CTX *mem_ctx)
3953 struct ctdb_transaction_handle *h;
3956 h = talloc_zero(mem_ctx, struct ctdb_transaction_handle);
3958 DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n"));
3962 h->ctdb_db = ctdb_db;
3964 ret = ctdb_transaction_fetch_start(h);
3970 talloc_set_destructor(h, ctdb_transaction_destructor);
3978 fetch a record inside a transaction
3980 int ctdb_transaction_fetch(struct ctdb_transaction_handle *h,
3981 TALLOC_CTX *mem_ctx,
3982 TDB_DATA key, TDB_DATA *data)
3984 struct ctdb_ltdb_header header;
3987 ZERO_STRUCT(header);
3989 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data);
3990 if (ret == -1 && header.dmaster == (uint32_t)-1) {
3991 /* record doesn't exist yet */
4000 if (!h->in_replay) {
4001 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data);
4002 if (h->m_all == NULL) {
4003 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
4012 stores a record inside a transaction
4014 int ctdb_transaction_store(struct ctdb_transaction_handle *h,
4015 TDB_DATA key, TDB_DATA data)
4017 TALLOC_CTX *tmp_ctx = talloc_new(h);
4018 struct ctdb_ltdb_header header;
4022 ZERO_STRUCT(header);
4024 /* we need the header so we can update the RSN */
4025 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
4026 if (ret == -1 && header.dmaster == (uint32_t)-1) {
4027 /* the record doesn't exist - create one with us as dmaster.
4028 This is only safe because we are in a transaction and this
4029 is a persistent database */
4030 ZERO_STRUCT(header);
4031 } else if (ret != 0) {
4032 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n"));
4033 talloc_free(tmp_ctx);
4037 if (data.dsize == olddata.dsize &&
4038 memcmp(data.dptr, olddata.dptr, data.dsize) == 0) {
4039 /* save writing the same data */
4040 talloc_free(tmp_ctx);
4044 header.dmaster = h->ctdb_db->ctdb->pnn;
4047 if (!h->in_replay) {
4048 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data);
4049 if (h->m_all == NULL) {
4050 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
4051 talloc_free(tmp_ctx);
4056 h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
4057 if (h->m_write == NULL) {
4058 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
4059 talloc_free(tmp_ctx);
4063 ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data);
4065 talloc_free(tmp_ctx);
4071 replay a transaction
4073 static int ctdb_replay_transaction(struct ctdb_transaction_handle *h)
4076 struct ctdb_rec_data *rec = NULL;
4078 h->in_replay = true;
4079 talloc_free(h->m_write);
4082 ret = ctdb_transaction_fetch_start(h);
4087 for (i=0;i<h->m_all->count;i++) {
4090 rec = ctdb_marshall_loop_next(h->m_all, rec, NULL, NULL, &key, &data);
4092 DEBUG(DEBUG_ERR, (__location__ " Out of records in ctdb_replay_transaction?\n"));
4096 if (rec->reqid == 0) {
4098 if (ctdb_transaction_store(h, key, data) != 0) {
4103 TALLOC_CTX *tmp_ctx = talloc_new(h);
4105 if (ctdb_transaction_fetch(h, tmp_ctx, key, &data2) != 0) {
4106 talloc_free(tmp_ctx);
4109 if (data2.dsize != data.dsize ||
4110 memcmp(data2.dptr, data.dptr, data.dsize) != 0) {
4111 /* the record has changed on us - we have to give up */
4112 talloc_free(tmp_ctx);
4115 talloc_free(tmp_ctx);
4122 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
4128 commit a transaction
4130 int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
4134 struct ctdb_context *ctdb = h->ctdb_db->ctdb;
4135 struct timeval timeout;
4136 enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR;
4138 talloc_set_destructor(h, NULL);
4140 /* our commit strategy is quite complex.
4142 - we first try to commit the changes to all other nodes
4144 - if that works, then we commit locally and we are done
4146 - if a commit on another node fails, then we need to cancel
4147 the transaction, then restart the transaction (thus
4148 opening a window of time for a pending recovery to
4149 complete), then replay the transaction, checking all the
4150 reads and writes (checking that reads give the same data,
4151 and writes succeed). Then we retry the transaction to the
4156 if (h->m_write == NULL) {
4157 /* no changes were made */
4158 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
4163 /* tell ctdbd to commit to the other nodes */
4164 timeout = timeval_current_ofs(1, 0);
4165 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
4166 retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, 0,
4167 ctdb_marshall_finish(h->m_write), NULL, NULL, &status,
4169 if (ret != 0 || status != 0) {
4170 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
4171 DEBUG(DEBUG_NOTICE, (__location__ " transaction commit%s failed"
4172 ", retrying after 1 second...\n",
4173 (retries==0)?"":"retry "));
4177 failure_control = CTDB_CONTROL_TRANS2_ERROR;
4179 /* work out what error code we will give if we
4180 have to fail the operation */
4181 switch ((enum ctdb_trans2_commit_error)status) {
4182 case CTDB_TRANS2_COMMIT_SUCCESS:
4183 case CTDB_TRANS2_COMMIT_SOMEFAIL:
4184 case CTDB_TRANS2_COMMIT_TIMEOUT:
4185 failure_control = CTDB_CONTROL_TRANS2_ERROR;
4187 case CTDB_TRANS2_COMMIT_ALLFAIL:
4188 failure_control = CTDB_CONTROL_TRANS2_FINISHED;
4193 if (++retries == 100) {
4194 DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n",
4195 h->ctdb_db->db_id, retries, (unsigned)failure_control));
4196 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
4197 failure_control, CTDB_CTRL_FLAG_NOREPLY,
4198 tdb_null, NULL, NULL, NULL, NULL, NULL);
4203 if (ctdb_replay_transaction(h) != 0) {
4204 DEBUG(DEBUG_ERR, (__location__ " Failed to replay "
4205 "transaction on db 0x%08x, "
4206 "failure control =%u\n",
4208 (unsigned)failure_control));
4209 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
4210 failure_control, CTDB_CTRL_FLAG_NOREPLY,
4211 tdb_null, NULL, NULL, NULL, NULL, NULL);
4217 failure_control = CTDB_CONTROL_TRANS2_ERROR;
4220 /* do the real commit locally */
4221 ret = tdb_transaction_commit(h->ctdb_db->ltdb->tdb);
4223 DEBUG(DEBUG_ERR, (__location__ " Failed to commit transaction "
4224 "on db id 0x%08x locally, "
4225 "failure_control=%u\n",
4227 (unsigned)failure_control));
4228 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
4229 failure_control, CTDB_CTRL_FLAG_NOREPLY,
4230 tdb_null, NULL, NULL, NULL, NULL, NULL);
4235 /* tell ctdbd that we are finished with our local commit */
4236 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
4237 CTDB_CONTROL_TRANS2_FINISHED, CTDB_CTRL_FLAG_NOREPLY,
4238 tdb_null, NULL, NULL, NULL, NULL, NULL);
4244 recovery daemon ping to main daemon
4246 int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
4251 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
4252 ctdb, NULL, &res, NULL, NULL);
4253 if (ret != 0 || res != 0) {
4254 DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
4261 /* When forking the main daemon and the child process needs to connect
4262 * back to the daemon as a client process, this function can be used
4263 * to change the ctdb context from daemon into client mode. The child
4264 * process must be created using ctdb_fork() and not fork() -
4265 * ctdb_fork() does some necessary housekeeping.
4267 int switch_from_server_to_client(struct ctdb_context *ctdb, const char *fmt, ...)
4272 /* Add extra information so we can identify this in the logs */
4274 debug_extra = talloc_strdup_append(talloc_vasprintf(NULL, fmt, ap), ":");
4277 /* get a new event context */
4278 ctdb->ev = event_context_init(ctdb);
4279 tevent_loop_allow_nesting(ctdb->ev);
4281 /* Connect to main CTDB daemon */
4282 ret = ctdb_socket_connect(ctdb);
4284 DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
4288 ctdb->can_send_controls = true;
4294 get the status of running the monitor eventscripts: NULL means never run.
4296 int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb,
4297 struct timeval timeout, uint32_t destnode,
4298 TALLOC_CTX *mem_ctx, enum ctdb_eventscript_call type,
4299 struct ctdb_scripts_wire **scripts)
4302 TDB_DATA outdata, indata;
4304 uint32_t uinttype = type;
4306 indata.dptr = (uint8_t *)&uinttype;
4307 indata.dsize = sizeof(uinttype);
4309 ret = ctdb_control(ctdb, destnode, 0,
4310 CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, indata,
4311 mem_ctx, &outdata, &res, &timeout, NULL);
4312 if (ret != 0 || res != 0) {
4313 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getscriptstatus failed ret:%d res:%d\n", ret, res));
4317 if (outdata.dsize == 0) {
4320 *scripts = (struct ctdb_scripts_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
4321 talloc_free(outdata.dptr);
4328 tell the main daemon how long it took to lock the reclock file
4330 int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval timeout, double latency)
4336 data.dptr = (uint8_t *)&latency;
4337 data.dsize = sizeof(latency);
4339 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_RECLOCK_LATENCY, 0, data,
4340 ctdb, NULL, &res, NULL, NULL);
4341 if (ret != 0 || res != 0) {
4342 DEBUG(DEBUG_ERR,("Failed to send recd reclock latency\n"));
4350 get the name of the reclock file
4352 int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout,
4353 uint32_t destnode, TALLOC_CTX *mem_ctx,
4360 ret = ctdb_control(ctdb, destnode, 0,
4361 CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
4362 mem_ctx, &data, &res, &timeout, NULL);
4363 if (ret != 0 || res != 0) {
4367 if (data.dsize == 0) {
4370 *name = talloc_strdup(mem_ctx, discard_const(data.dptr));
4372 talloc_free(data.dptr);
4378 set the reclock filename for a node
4380 int ctdb_ctrl_setreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *reclock)
4386 if (reclock == NULL) {
4390 data.dsize = strlen(reclock) + 1;
4391 data.dptr = discard_const(reclock);
4394 ret = ctdb_control(ctdb, destnode, 0,
4395 CTDB_CONTROL_SET_RECLOCK_FILE, 0, data,
4396 NULL, NULL, &res, &timeout, NULL);
4397 if (ret != 0 || res != 0) {
4398 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setreclock failed\n"));
4408 int ctdb_ctrl_stop_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
4413 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_STOP_NODE, 0, tdb_null,
4414 ctdb, NULL, &res, &timeout, NULL);
4415 if (ret != 0 || res != 0) {
4416 DEBUG(DEBUG_ERR,("Failed to stop node\n"));
4426 int ctdb_ctrl_continue_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
4430 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CONTINUE_NODE, 0, tdb_null,
4431 ctdb, NULL, NULL, &timeout, NULL);
4433 DEBUG(DEBUG_ERR,("Failed to continue node\n"));
4441 set the natgw state for a node
4443 int ctdb_ctrl_setnatgwstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t natgwstate)
4449 data.dsize = sizeof(natgwstate);
4450 data.dptr = (uint8_t *)&natgwstate;
4452 ret = ctdb_control(ctdb, destnode, 0,
4453 CTDB_CONTROL_SET_NATGWSTATE, 0, data,
4454 NULL, NULL, &res, &timeout, NULL);
4455 if (ret != 0 || res != 0) {
4456 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setnatgwstate failed\n"));
4464 set the lmaster role for a node
4466 int ctdb_ctrl_setlmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t lmasterrole)
4472 data.dsize = sizeof(lmasterrole);
4473 data.dptr = (uint8_t *)&lmasterrole;
4475 ret = ctdb_control(ctdb, destnode, 0,
4476 CTDB_CONTROL_SET_LMASTERROLE, 0, data,
4477 NULL, NULL, &res, &timeout, NULL);
4478 if (ret != 0 || res != 0) {
4479 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setlmasterrole failed\n"));
4487 set the recmaster role for a node
4489 int ctdb_ctrl_setrecmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmasterrole)
4495 data.dsize = sizeof(recmasterrole);
4496 data.dptr = (uint8_t *)&recmasterrole;
4498 ret = ctdb_control(ctdb, destnode, 0,
4499 CTDB_CONTROL_SET_RECMASTERROLE, 0, data,
4500 NULL, NULL, &res, &timeout, NULL);
4501 if (ret != 0 || res != 0) {
4502 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmasterrole failed\n"));
4509 /* enable an eventscript
4511 int ctdb_ctrl_enablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4517 data.dsize = strlen(script) + 1;
4518 data.dptr = discard_const(script);
4520 ret = ctdb_control(ctdb, destnode, 0,
4521 CTDB_CONTROL_ENABLE_SCRIPT, 0, data,
4522 NULL, NULL, &res, &timeout, NULL);
4523 if (ret != 0 || res != 0) {
4524 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enablescript failed\n"));
4531 /* disable an eventscript
4533 int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4539 data.dsize = strlen(script) + 1;
4540 data.dptr = discard_const(script);
4542 ret = ctdb_control(ctdb, destnode, 0,
4543 CTDB_CONTROL_DISABLE_SCRIPT, 0, data,
4544 NULL, NULL, &res, &timeout, NULL);
4545 if (ret != 0 || res != 0) {
4546 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disablescript failed\n"));
4554 int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime)
4560 data.dsize = sizeof(*bantime);
4561 data.dptr = (uint8_t *)bantime;
4563 ret = ctdb_control(ctdb, destnode, 0,
4564 CTDB_CONTROL_SET_BAN_STATE, 0, data,
4565 NULL, NULL, &res, &timeout, NULL);
4566 if (ret != 0 || res != 0) {
4567 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4575 int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime)
4580 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4582 ret = ctdb_control(ctdb, destnode, 0,
4583 CTDB_CONTROL_GET_BAN_STATE, 0, tdb_null,
4584 tmp_ctx, &outdata, &res, &timeout, NULL);
4585 if (ret != 0 || res != 0) {
4586 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4587 talloc_free(tmp_ctx);
4591 *bantime = (struct ctdb_ban_time *)talloc_steal(mem_ctx, outdata.dptr);
4592 talloc_free(tmp_ctx);
4598 int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio)
4603 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4605 data.dptr = (uint8_t*)db_prio;
4606 data.dsize = sizeof(*db_prio);
4608 ret = ctdb_control(ctdb, destnode, 0,
4609 CTDB_CONTROL_SET_DB_PRIORITY, 0, data,
4610 tmp_ctx, NULL, &res, &timeout, NULL);
4611 if (ret != 0 || res != 0) {
4612 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4613 talloc_free(tmp_ctx);
4617 talloc_free(tmp_ctx);
4622 int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority)
4627 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4629 data.dptr = (uint8_t*)&db_id;
4630 data.dsize = sizeof(db_id);
4632 ret = ctdb_control(ctdb, destnode, 0,
4633 CTDB_CONTROL_GET_DB_PRIORITY, 0, data,
4634 tmp_ctx, NULL, &res, &timeout, NULL);
4635 if (ret != 0 || res < 0) {
4636 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_db_priority failed\n"));
4637 talloc_free(tmp_ctx);
4645 talloc_free(tmp_ctx);
4650 int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats)
4656 ret = ctdb_control(ctdb, destnode, 0,
4657 CTDB_CONTROL_GET_STAT_HISTORY, 0, tdb_null,
4658 mem_ctx, &outdata, &res, &timeout, NULL);
4659 if (ret != 0 || res != 0 || outdata.dsize == 0) {
4660 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getstathistory failed ret:%d res:%d\n", ret, res));
4664 *stats = (struct ctdb_statistics_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
4665 talloc_free(outdata.dptr);
4670 struct ctdb_ltdb_header *ctdb_header_from_record_handle(struct ctdb_record_handle *h)
4680 struct ctdb_client_control_state *
4681 ctdb_ctrl_updaterecord_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
4683 struct ctdb_client_control_state *handle;
4684 struct ctdb_marshall_buffer *m;
4685 struct ctdb_rec_data *rec;
4688 m = talloc_zero(mem_ctx, struct ctdb_marshall_buffer);
4690 DEBUG(DEBUG_ERR, ("Failed to allocate marshall buffer for update record\n"));
4694 m->db_id = ctdb_db->db_id;
4696 rec = ctdb_marshall_record(m, 0, key, header, data);
4698 DEBUG(DEBUG_ERR,("Failed to marshall record for update record\n"));
4702 m = talloc_realloc_size(mem_ctx, m, rec->length + offsetof(struct ctdb_marshall_buffer, data));
4704 DEBUG(DEBUG_CRIT,(__location__ " Failed to expand recdata\n"));
4709 memcpy((uint8_t *)m + offsetof(struct ctdb_marshall_buffer, data), rec, rec->length);
4712 outdata.dptr = (uint8_t *)m;
4713 outdata.dsize = talloc_get_size(m);
4715 handle = ctdb_control_send(ctdb, destnode, 0,
4716 CTDB_CONTROL_UPDATE_RECORD, 0, outdata,
4717 mem_ctx, &timeout, NULL);
4722 int ctdb_ctrl_updaterecord_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
4727 ret = ctdb_control_recv(ctdb, state, state, NULL, &res, NULL);
4728 if ( (ret != 0) || (res != 0) ){
4729 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_update_record_recv failed\n"));
4737 ctdb_ctrl_updaterecord(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
4739 struct ctdb_client_control_state *state;
4741 state = ctdb_ctrl_updaterecord_send(ctdb, mem_ctx, timeout, destnode, ctdb_db, key, header, data);
4742 return ctdb_ctrl_updaterecord_recv(ctdb, state);
4751 set a database to be readonly
4753 struct ctdb_client_control_state *
4754 ctdb_ctrl_set_db_readonly_send(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4758 data.dptr = (uint8_t *)&dbid;
4759 data.dsize = sizeof(dbid);
4761 return ctdb_control_send(ctdb, destnode, 0,
4762 CTDB_CONTROL_SET_DB_READONLY, 0, data,
4766 int ctdb_ctrl_set_db_readonly_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
4771 ret = ctdb_control_recv(ctdb, state, ctdb, NULL, &res, NULL);
4772 if (ret != 0 || res != 0) {
4773 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_set_db_readonly_recv failed ret:%d res:%d\n", ret, res));
4780 int ctdb_ctrl_set_db_readonly(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4782 struct ctdb_client_control_state *state;
4784 state = ctdb_ctrl_set_db_readonly_send(ctdb, destnode, dbid);
4785 return ctdb_ctrl_set_db_readonly_recv(ctdb, state);
4789 set a database to be sticky
4791 struct ctdb_client_control_state *
4792 ctdb_ctrl_set_db_sticky_send(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4796 data.dptr = (uint8_t *)&dbid;
4797 data.dsize = sizeof(dbid);
4799 return ctdb_control_send(ctdb, destnode, 0,
4800 CTDB_CONTROL_SET_DB_STICKY, 0, data,
4804 int ctdb_ctrl_set_db_sticky_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
4809 ret = ctdb_control_recv(ctdb, state, ctdb, NULL, &res, NULL);
4810 if (ret != 0 || res != 0) {
4811 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_set_db_sticky_recv failed ret:%d res:%d\n", ret, res));
4818 int ctdb_ctrl_set_db_sticky(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4820 struct ctdb_client_control_state *state;
4822 state = ctdb_ctrl_set_db_sticky_send(ctdb, destnode, dbid);
4823 return ctdb_ctrl_set_db_sticky_recv(ctdb, state);