4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
23 #include "lib/tdb/include/tdb.h"
24 #include "lib/util/dlinklist.h"
25 #include "lib/tevent/tevent.h"
26 #include "system/network.h"
27 #include "system/filesys.h"
28 #include "system/locale.h"
30 #include "../include/ctdb_private.h"
31 #include "lib/util/dlinklist.h"
36 allocate a packet for use in client<->daemon communication
38 struct ctdb_req_header *_ctdbd_allocate_pkt(struct ctdb_context *ctdb,
40 enum ctdb_operation operation,
41 size_t length, size_t slength,
45 struct ctdb_req_header *hdr;
47 length = MAX(length, slength);
48 size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
50 hdr = (struct ctdb_req_header *)talloc_size(mem_ctx, size);
52 DEBUG(DEBUG_ERR,("Unable to allocate packet for operation %u of length %u\n",
53 operation, (unsigned)length));
56 talloc_set_name_const(hdr, type);
57 memset(hdr, 0, slength);
59 hdr->operation = operation;
60 hdr->ctdb_magic = CTDB_MAGIC;
61 hdr->ctdb_version = CTDB_VERSION;
62 hdr->srcnode = ctdb->pnn;
64 hdr->generation = ctdb->vnn_map->generation;
71 local version of ctdb_call
73 int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
74 struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
77 struct ctdb_call_info *c;
78 struct ctdb_registered_call *fn;
79 struct ctdb_context *ctdb = ctdb_db->ctdb;
81 c = talloc(ctdb, struct ctdb_call_info);
82 CTDB_NO_MEMORY(ctdb, c);
85 c->call_data = &call->call_data;
86 c->record_data.dptr = talloc_memdup(c, data->dptr, data->dsize);
87 c->record_data.dsize = data->dsize;
88 CTDB_NO_MEMORY(ctdb, c->record_data.dptr);
93 for (fn=ctdb_db->calls;fn;fn=fn->next) {
94 if (fn->id == call->call_id) break;
97 ctdb_set_error(ctdb, "Unknown call id %u\n", call->call_id);
102 if (fn->fn(c) != 0) {
103 ctdb_set_error(ctdb, "ctdb_call %u failed\n", call->call_id);
108 /* we need to force the record to be written out if this was a remote access */
109 if (c->new_data == NULL) {
110 c->new_data = &c->record_data;
114 /* XXX check that we always have the lock here? */
115 if (ctdb_ltdb_store(ctdb_db, call->key, header, *c->new_data) != 0) {
116 ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
123 call->reply_data = *c->reply_data;
125 talloc_steal(call, call->reply_data.dptr);
126 talloc_set_name_const(call->reply_data.dptr, __location__);
128 call->reply_data.dptr = NULL;
129 call->reply_data.dsize = 0;
131 call->status = c->status;
140 queue a packet for sending from client to daemon
142 static int ctdb_client_queue_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
144 return ctdb_queue_send(ctdb->daemon.queue, (uint8_t *)hdr, hdr->length);
149 called when a CTDB_REPLY_CALL packet comes in in the client
151 This packet comes in response to a CTDB_REQ_CALL request packet. It
152 contains any reply data from the call
154 static void ctdb_client_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
156 struct ctdb_reply_call *c = (struct ctdb_reply_call *)hdr;
157 struct ctdb_client_call_state *state;
159 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_call_state);
161 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
165 if (hdr->reqid != state->reqid) {
166 /* we found a record but it was the wrong one */
167 DEBUG(DEBUG_ERR, ("Dropped client call reply with reqid:%u\n",hdr->reqid));
171 state->call->reply_data.dptr = c->data;
172 state->call->reply_data.dsize = c->datalen;
173 state->call->status = c->status;
175 talloc_steal(state, c);
177 state->state = CTDB_CALL_DONE;
179 if (state->async.fn) {
180 state->async.fn(state);
184 static void ctdb_client_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
187 this is called in the client, when data comes in from the daemon
189 static void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
191 struct ctdb_context *ctdb = talloc_get_type(args, struct ctdb_context);
192 struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
195 /* place the packet as a child of a tmp_ctx. We then use
196 talloc_free() below to free it. If any of the calls want
197 to keep it, then they will steal it somewhere else, and the
198 talloc_free() will be a no-op */
199 tmp_ctx = talloc_new(ctdb);
200 talloc_steal(tmp_ctx, hdr);
203 DEBUG(DEBUG_INFO,("Daemon has exited - shutting down client\n"));
207 if (cnt < sizeof(*hdr)) {
208 DEBUG(DEBUG_CRIT,("Bad packet length %u in client\n", (unsigned)cnt));
211 if (cnt != hdr->length) {
212 ctdb_set_error(ctdb, "Bad header length %u expected %u in client\n",
213 (unsigned)hdr->length, (unsigned)cnt);
217 if (hdr->ctdb_magic != CTDB_MAGIC) {
218 ctdb_set_error(ctdb, "Non CTDB packet rejected in client\n");
222 if (hdr->ctdb_version != CTDB_VERSION) {
223 ctdb_set_error(ctdb, "Bad CTDB version 0x%x rejected in client\n", hdr->ctdb_version);
227 switch (hdr->operation) {
228 case CTDB_REPLY_CALL:
229 ctdb_client_reply_call(ctdb, hdr);
232 case CTDB_REQ_MESSAGE:
233 ctdb_request_message(ctdb, hdr);
236 case CTDB_REPLY_CONTROL:
237 ctdb_client_reply_control(ctdb, hdr);
241 DEBUG(DEBUG_CRIT,("bogus operation code:%u\n",hdr->operation));
245 talloc_free(tmp_ctx);
249 connect to a unix domain socket
251 int ctdb_socket_connect(struct ctdb_context *ctdb)
253 struct sockaddr_un addr;
255 memset(&addr, 0, sizeof(addr));
256 addr.sun_family = AF_UNIX;
257 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
259 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
260 if (ctdb->daemon.sd == -1) {
261 DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
265 set_nonblocking(ctdb->daemon.sd);
266 set_close_on_exec(ctdb->daemon.sd);
268 if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
269 close(ctdb->daemon.sd);
270 ctdb->daemon.sd = -1;
271 DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon. Errno:%s(%d)\n", strerror(errno), errno));
275 ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
277 ctdb_client_read_cb, ctdb, "to-ctdbd");
282 struct ctdb_record_handle {
283 struct ctdb_db_context *ctdb_db;
286 struct ctdb_ltdb_header header;
291 make a recv call to the local ctdb daemon - called from client context
293 This is called when the program wants to wait for a ctdb_call to complete and get the
294 results. This call will block unless the call has already completed.
296 int ctdb_call_recv(struct ctdb_client_call_state *state, struct ctdb_call *call)
302 while (state->state < CTDB_CALL_DONE) {
303 event_loop_once(state->ctdb_db->ctdb->ev);
305 if (state->state != CTDB_CALL_DONE) {
306 DEBUG(DEBUG_ERR,(__location__ " ctdb_call_recv failed\n"));
311 if (state->call->reply_data.dsize) {
312 call->reply_data.dptr = talloc_memdup(state->ctdb_db,
313 state->call->reply_data.dptr,
314 state->call->reply_data.dsize);
315 call->reply_data.dsize = state->call->reply_data.dsize;
317 call->reply_data.dptr = NULL;
318 call->reply_data.dsize = 0;
320 call->status = state->call->status;
330 destroy a ctdb_call in client
332 static int ctdb_client_call_destructor(struct ctdb_client_call_state *state)
334 ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
339 construct an event driven local ctdb_call
341 this is used so that locally processed ctdb_call requests are processed
342 in an event driven manner
344 static struct ctdb_client_call_state *ctdb_client_call_local_send(struct ctdb_db_context *ctdb_db,
345 struct ctdb_call *call,
346 struct ctdb_ltdb_header *header,
349 struct ctdb_client_call_state *state;
350 struct ctdb_context *ctdb = ctdb_db->ctdb;
353 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
354 CTDB_NO_MEMORY_NULL(ctdb, state);
355 state->call = talloc_zero(state, struct ctdb_call);
356 CTDB_NO_MEMORY_NULL(ctdb, state->call);
358 talloc_steal(state, data->dptr);
360 state->state = CTDB_CALL_DONE;
361 *(state->call) = *call;
362 state->ctdb_db = ctdb_db;
364 ret = ctdb_call_local(ctdb_db, state->call, header, state, data);
370 make a ctdb call to the local daemon - async send. Called from client context.
372 This constructs a ctdb_call request and queues it for processing.
373 This call never blocks.
375 struct ctdb_client_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db,
376 struct ctdb_call *call)
378 struct ctdb_client_call_state *state;
379 struct ctdb_context *ctdb = ctdb_db->ctdb;
380 struct ctdb_ltdb_header header;
384 struct ctdb_req_call *c;
386 /* if the domain socket is not yet open, open it */
387 if (ctdb->daemon.sd==-1) {
388 ctdb_socket_connect(ctdb);
391 ret = ctdb_ltdb_lock(ctdb_db, call->key);
393 DEBUG(DEBUG_ERR,(__location__ " Failed to get chainlock\n"));
397 ret = ctdb_ltdb_fetch(ctdb_db, call->key, &header, ctdb_db, &data);
399 if (ret == 0 && header.dmaster == ctdb->pnn) {
400 state = ctdb_client_call_local_send(ctdb_db, call, &header, &data);
401 talloc_free(data.dptr);
402 ctdb_ltdb_unlock(ctdb_db, call->key);
406 ctdb_ltdb_unlock(ctdb_db, call->key);
407 talloc_free(data.dptr);
409 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
411 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state\n"));
414 state->call = talloc_zero(state, struct ctdb_call);
415 if (state->call == NULL) {
416 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state->call\n"));
420 len = offsetof(struct ctdb_req_call, data) + call->key.dsize + call->call_data.dsize;
421 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CALL, len, struct ctdb_req_call);
423 DEBUG(DEBUG_ERR, (__location__ " failed to allocate packet\n"));
427 state->reqid = ctdb_reqid_new(ctdb, state);
428 state->ctdb_db = ctdb_db;
429 talloc_set_destructor(state, ctdb_client_call_destructor);
431 c->hdr.reqid = state->reqid;
432 c->flags = call->flags;
433 c->db_id = ctdb_db->db_id;
434 c->callid = call->call_id;
436 c->keylen = call->key.dsize;
437 c->calldatalen = call->call_data.dsize;
438 memcpy(&c->data[0], call->key.dptr, call->key.dsize);
439 memcpy(&c->data[call->key.dsize],
440 call->call_data.dptr, call->call_data.dsize);
441 *(state->call) = *call;
442 state->call->call_data.dptr = &c->data[call->key.dsize];
443 state->call->key.dptr = &c->data[0];
445 state->state = CTDB_CALL_WAIT;
448 ctdb_client_queue_pkt(ctdb, &c->hdr);
455 full ctdb_call. Equivalent to a ctdb_call_send() followed by a ctdb_call_recv()
457 int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
459 struct ctdb_client_call_state *state;
461 state = ctdb_call_send(ctdb_db, call);
462 return ctdb_call_recv(state, call);
467 tell the daemon what messaging srvid we will use, and register the message
468 handler function in the client
470 int ctdb_client_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
471 ctdb_msg_fn_t handler,
478 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_REGISTER_SRVID, 0,
479 tdb_null, NULL, NULL, &status, NULL, NULL);
480 if (res != 0 || status != 0) {
481 DEBUG(DEBUG_ERR,("Failed to register srvid %llu\n", (unsigned long long)srvid));
485 /* also need to register the handler with our own ctdb structure */
486 return ctdb_register_message_handler(ctdb, ctdb, srvid, handler, private_data);
490 tell the daemon we no longer want a srvid
492 int ctdb_client_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
497 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_DEREGISTER_SRVID, 0,
498 tdb_null, NULL, NULL, &status, NULL, NULL);
499 if (res != 0 || status != 0) {
500 DEBUG(DEBUG_ERR,("Failed to deregister srvid %llu\n", (unsigned long long)srvid));
504 /* also need to register the handler with our own ctdb structure */
505 ctdb_deregister_message_handler(ctdb, srvid, private_data);
511 send a message - from client context
513 int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
514 uint64_t srvid, TDB_DATA data)
516 struct ctdb_req_message *r;
519 len = offsetof(struct ctdb_req_message, data) + data.dsize;
520 r = ctdbd_allocate_pkt(ctdb, ctdb, CTDB_REQ_MESSAGE,
521 len, struct ctdb_req_message);
522 CTDB_NO_MEMORY(ctdb, r);
524 r->hdr.destnode = pnn;
526 r->datalen = data.dsize;
527 memcpy(&r->data[0], data.dptr, data.dsize);
529 res = ctdb_client_queue_pkt(ctdb, &r->hdr);
540 cancel a ctdb_fetch_lock operation, releasing the lock
542 static int fetch_lock_destructor(struct ctdb_record_handle *h)
544 ctdb_ltdb_unlock(h->ctdb_db, h->key);
549 force the migration of a record to this node
551 static int ctdb_client_force_migration(struct ctdb_db_context *ctdb_db, TDB_DATA key)
553 struct ctdb_call call;
555 call.call_id = CTDB_NULL_FUNC;
557 call.flags = CTDB_IMMEDIATE_MIGRATION;
558 return ctdb_call(ctdb_db, &call);
562 get a lock on a record, and return the records data. Blocks until it gets the lock
564 struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
565 TDB_DATA key, TDB_DATA *data)
568 struct ctdb_record_handle *h;
571 procedure is as follows:
573 1) get the chain lock.
574 2) check if we are dmaster
575 3) if we are the dmaster then return handle
576 4) if not dmaster then ask ctdb daemon to make us dmaster, and wait for
578 5) when we get the reply, goto (1)
581 h = talloc_zero(mem_ctx, struct ctdb_record_handle);
586 h->ctdb_db = ctdb_db;
588 h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
589 if (h->key.dptr == NULL) {
595 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: key=%*.*s\n", (int)key.dsize, (int)key.dsize,
596 (const char *)key.dptr));
599 /* step 1 - get the chain lock */
600 ret = ctdb_ltdb_lock(ctdb_db, key);
602 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
607 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: got chain lock\n"));
609 talloc_set_destructor(h, fetch_lock_destructor);
611 ret = ctdb_ltdb_fetch(ctdb_db, key, &h->header, h, data);
613 /* when torturing, ensure we test the remote path */
614 if ((ctdb_db->ctdb->flags & CTDB_FLAG_TORTURE) &&
616 h->header.dmaster = (uint32_t)-1;
620 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: done local fetch\n"));
622 if (ret != 0 || h->header.dmaster != ctdb_db->ctdb->pnn) {
623 ctdb_ltdb_unlock(ctdb_db, key);
624 ret = ctdb_client_force_migration(ctdb_db, key);
626 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
633 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: we are dmaster - done\n"));
638 store some data to the record that was locked with ctdb_fetch_lock()
640 int ctdb_record_store(struct ctdb_record_handle *h, TDB_DATA data)
642 if (h->ctdb_db->persistent) {
643 DEBUG(DEBUG_ERR, (__location__ " ctdb_record_store prohibited for persistent dbs\n"));
647 return ctdb_ltdb_store(h->ctdb_db, h->key, &h->header, data);
651 non-locking fetch of a record
653 int ctdb_fetch(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
654 TDB_DATA key, TDB_DATA *data)
656 struct ctdb_call call;
659 call.call_id = CTDB_FETCH_FUNC;
660 call.call_data.dptr = NULL;
661 call.call_data.dsize = 0;
663 ret = ctdb_call(ctdb_db, &call);
666 *data = call.reply_data;
667 talloc_steal(mem_ctx, data->dptr);
676 called when a control completes or timesout to invoke the callback
677 function the user provided
679 static void invoke_control_callback(struct event_context *ev, struct timed_event *te,
680 struct timeval t, void *private_data)
682 struct ctdb_client_control_state *state;
683 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
686 state = talloc_get_type(private_data, struct ctdb_client_control_state);
687 talloc_steal(tmp_ctx, state);
689 ret = ctdb_control_recv(state->ctdb, state, state,
694 talloc_free(tmp_ctx);
698 called when a CTDB_REPLY_CONTROL packet comes in in the client
700 This packet comes in response to a CTDB_REQ_CONTROL request packet. It
701 contains any reply data from the control
703 static void ctdb_client_reply_control(struct ctdb_context *ctdb,
704 struct ctdb_req_header *hdr)
706 struct ctdb_reply_control *c = (struct ctdb_reply_control *)hdr;
707 struct ctdb_client_control_state *state;
709 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_control_state);
711 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
715 if (hdr->reqid != state->reqid) {
716 /* we found a record but it was the wrong one */
717 DEBUG(DEBUG_ERR, ("Dropped orphaned reply control with reqid:%u\n",hdr->reqid));
721 state->outdata.dptr = c->data;
722 state->outdata.dsize = c->datalen;
723 state->status = c->status;
725 state->errormsg = talloc_strndup(state,
726 (char *)&c->data[c->datalen],
730 /* state->outdata now uses resources from c so we dont want c
731 to just dissappear from under us while state is still alive
733 talloc_steal(state, c);
735 state->state = CTDB_CONTROL_DONE;
737 /* if we had a callback registered for this control, pull the response
738 and call the callback.
740 if (state->async.fn) {
741 event_add_timed(ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
747 destroy a ctdb_control in client
749 static int ctdb_control_destructor(struct ctdb_client_control_state *state)
751 ctdb_reqid_remove(state->ctdb, state->reqid);
756 /* time out handler for ctdb_control */
757 static void control_timeout_func(struct event_context *ev, struct timed_event *te,
758 struct timeval t, void *private_data)
760 struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
762 DEBUG(DEBUG_ERR,(__location__ " control timed out. reqid:%u opcode:%u "
763 "dstnode:%u\n", state->reqid, state->c->opcode,
764 state->c->hdr.destnode));
766 state->state = CTDB_CONTROL_TIMEOUT;
768 /* if we had a callback registered for this control, pull the response
769 and call the callback.
771 if (state->async.fn) {
772 event_add_timed(state->ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
776 /* async version of send control request */
777 struct ctdb_client_control_state *ctdb_control_send(struct ctdb_context *ctdb,
778 uint32_t destnode, uint64_t srvid,
779 uint32_t opcode, uint32_t flags, TDB_DATA data,
781 struct timeval *timeout,
784 struct ctdb_client_control_state *state;
786 struct ctdb_req_control *c;
793 /* if the domain socket is not yet open, open it */
794 if (ctdb->daemon.sd==-1) {
795 ctdb_socket_connect(ctdb);
798 state = talloc_zero(mem_ctx, struct ctdb_client_control_state);
799 CTDB_NO_MEMORY_NULL(ctdb, state);
802 state->reqid = ctdb_reqid_new(ctdb, state);
803 state->state = CTDB_CONTROL_WAIT;
804 state->errormsg = NULL;
806 talloc_set_destructor(state, ctdb_control_destructor);
808 len = offsetof(struct ctdb_req_control, data) + data.dsize;
809 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CONTROL,
810 len, struct ctdb_req_control);
812 CTDB_NO_MEMORY_NULL(ctdb, c);
813 c->hdr.reqid = state->reqid;
814 c->hdr.destnode = destnode;
819 c->datalen = data.dsize;
821 memcpy(&c->data[0], data.dptr, data.dsize);
825 if (timeout && !timeval_is_zero(timeout)) {
826 event_add_timed(ctdb->ev, state, *timeout, control_timeout_func, state);
829 ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
835 if (flags & CTDB_CTRL_FLAG_NOREPLY) {
844 /* async version of receive control reply */
845 int ctdb_control_recv(struct ctdb_context *ctdb,
846 struct ctdb_client_control_state *state,
848 TDB_DATA *outdata, int32_t *status, char **errormsg)
852 if (status != NULL) {
855 if (errormsg != NULL) {
863 /* prevent double free of state */
864 tmp_ctx = talloc_new(ctdb);
865 talloc_steal(tmp_ctx, state);
867 /* loop one event at a time until we either timeout or the control
870 while (state->state == CTDB_CONTROL_WAIT) {
871 event_loop_once(ctdb->ev);
874 if (state->state != CTDB_CONTROL_DONE) {
875 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_recv failed\n"));
876 if (state->async.fn) {
877 state->async.fn(state);
879 talloc_free(tmp_ctx);
883 if (state->errormsg) {
884 DEBUG(DEBUG_ERR,("ctdb_control error: '%s'\n", state->errormsg));
886 (*errormsg) = talloc_move(mem_ctx, &state->errormsg);
888 if (state->async.fn) {
889 state->async.fn(state);
891 talloc_free(tmp_ctx);
896 *outdata = state->outdata;
897 outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
901 *status = state->status;
904 if (state->async.fn) {
905 state->async.fn(state);
908 talloc_free(tmp_ctx);
915 send a ctdb control message
916 timeout specifies how long we should wait for a reply.
917 if timeout is NULL we wait indefinitely
919 int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
920 uint32_t opcode, uint32_t flags, TDB_DATA data,
921 TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
922 struct timeval *timeout,
925 struct ctdb_client_control_state *state;
927 state = ctdb_control_send(ctdb, destnode, srvid, opcode,
928 flags, data, mem_ctx,
930 return ctdb_control_recv(ctdb, state, mem_ctx, outdata, status,
938 a process exists call. Returns 0 if process exists, -1 otherwise
940 int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t pid)
946 data.dptr = (uint8_t*)&pid;
947 data.dsize = sizeof(pid);
949 ret = ctdb_control(ctdb, destnode, 0,
950 CTDB_CONTROL_PROCESS_EXISTS, 0, data,
951 NULL, NULL, &status, NULL, NULL);
953 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for process_exists failed\n"));
961 get remote statistics
963 int ctdb_ctrl_statistics(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_statistics *status)
969 ret = ctdb_control(ctdb, destnode, 0,
970 CTDB_CONTROL_STATISTICS, 0, tdb_null,
971 ctdb, &data, &res, NULL, NULL);
972 if (ret != 0 || res != 0) {
973 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for statistics failed\n"));
977 if (data.dsize != sizeof(struct ctdb_statistics)) {
978 DEBUG(DEBUG_ERR,(__location__ " Wrong statistics size %u - expected %u\n",
979 (unsigned)data.dsize, (unsigned)sizeof(struct ctdb_statistics)));
983 *status = *(struct ctdb_statistics *)data.dptr;
984 talloc_free(data.dptr);
990 shutdown a remote ctdb node
992 int ctdb_ctrl_shutdown(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
994 struct ctdb_client_control_state *state;
996 state = ctdb_control_send(ctdb, destnode, 0,
997 CTDB_CONTROL_SHUTDOWN, 0, tdb_null,
998 NULL, &timeout, NULL);
1000 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for shutdown failed\n"));
1008 get vnn map from a remote node
1010 int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap)
1015 struct ctdb_vnn_map_wire *map;
1017 ret = ctdb_control(ctdb, destnode, 0,
1018 CTDB_CONTROL_GETVNNMAP, 0, tdb_null,
1019 mem_ctx, &outdata, &res, &timeout, NULL);
1020 if (ret != 0 || res != 0) {
1021 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getvnnmap failed\n"));
1025 map = (struct ctdb_vnn_map_wire *)outdata.dptr;
1026 if (outdata.dsize < offsetof(struct ctdb_vnn_map_wire, map) ||
1027 outdata.dsize != map->size*sizeof(uint32_t) + offsetof(struct ctdb_vnn_map_wire, map)) {
1028 DEBUG(DEBUG_ERR,("Bad vnn map size received in ctdb_ctrl_getvnnmap\n"));
1032 (*vnnmap) = talloc(mem_ctx, struct ctdb_vnn_map);
1033 CTDB_NO_MEMORY(ctdb, *vnnmap);
1034 (*vnnmap)->generation = map->generation;
1035 (*vnnmap)->size = map->size;
1036 (*vnnmap)->map = talloc_array(*vnnmap, uint32_t, map->size);
1038 CTDB_NO_MEMORY(ctdb, (*vnnmap)->map);
1039 memcpy((*vnnmap)->map, map->map, sizeof(uint32_t)*map->size);
1040 talloc_free(outdata.dptr);
1047 get the recovery mode of a remote node
1049 struct ctdb_client_control_state *
1050 ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
1052 return ctdb_control_send(ctdb, destnode, 0,
1053 CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
1054 mem_ctx, &timeout, NULL);
1057 int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode)
1062 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1064 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmode_recv failed\n"));
1069 *recmode = (uint32_t)res;
1075 int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
1077 struct ctdb_client_control_state *state;
1079 state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, timeout, destnode);
1080 return ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, state, recmode);
1087 set the recovery mode of a remote node
1089 int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode)
1095 data.dsize = sizeof(uint32_t);
1096 data.dptr = (unsigned char *)&recmode;
1098 ret = ctdb_control(ctdb, destnode, 0,
1099 CTDB_CONTROL_SET_RECMODE, 0, data,
1100 NULL, NULL, &res, &timeout, NULL);
1101 if (ret != 0 || res != 0) {
1102 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmode failed\n"));
1112 get the recovery master of a remote node
1114 struct ctdb_client_control_state *
1115 ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
1116 struct timeval timeout, uint32_t destnode)
1118 return ctdb_control_send(ctdb, destnode, 0,
1119 CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
1120 mem_ctx, &timeout, NULL);
1123 int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster)
1128 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1130 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmaster_recv failed\n"));
1135 *recmaster = (uint32_t)res;
1141 int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
1143 struct ctdb_client_control_state *state;
1145 state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx, timeout, destnode);
1146 return ctdb_ctrl_getrecmaster_recv(ctdb, mem_ctx, state, recmaster);
1151 set the recovery master of a remote node
1153 int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster)
1160 data.dsize = sizeof(uint32_t);
1161 data.dptr = (unsigned char *)&recmaster;
1163 ret = ctdb_control(ctdb, destnode, 0,
1164 CTDB_CONTROL_SET_RECMASTER, 0, data,
1165 NULL, NULL, &res, &timeout, NULL);
1166 if (ret != 0 || res != 0) {
1167 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmaster failed\n"));
1176 get a list of databases off a remote node
1178 int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1179 TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap)
1185 ret = ctdb_control(ctdb, destnode, 0,
1186 CTDB_CONTROL_GET_DBMAP, 0, tdb_null,
1187 mem_ctx, &outdata, &res, &timeout, NULL);
1188 if (ret != 0 || res != 0) {
1189 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res));
1193 *dbmap = (struct ctdb_dbid_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1194 talloc_free(outdata.dptr);
1200 get a list of nodes (vnn and flags ) from a remote node
1202 int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
1203 struct timeval timeout, uint32_t destnode,
1204 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1210 ret = ctdb_control(ctdb, destnode, 0,
1211 CTDB_CONTROL_GET_NODEMAP, 0, tdb_null,
1212 mem_ctx, &outdata, &res, &timeout, NULL);
1213 if (ret == 0 && res == -1 && outdata.dsize == 0) {
1214 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed, falling back to ipv4-only control\n"));
1215 return ctdb_ctrl_getnodemapv4(ctdb, timeout, destnode, mem_ctx, nodemap);
1217 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1218 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed ret:%d res:%d\n", ret, res));
1222 *nodemap = (struct ctdb_node_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1223 talloc_free(outdata.dptr);
1229 old style ipv4-only get a list of nodes (vnn and flags ) from a remote node
1231 int ctdb_ctrl_getnodemapv4(struct ctdb_context *ctdb,
1232 struct timeval timeout, uint32_t destnode,
1233 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1237 struct ctdb_node_mapv4 *nodemapv4;
1240 ret = ctdb_control(ctdb, destnode, 0,
1241 CTDB_CONTROL_GET_NODEMAPv4, 0, tdb_null,
1242 mem_ctx, &outdata, &res, &timeout, NULL);
1243 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1244 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodesv4 failed ret:%d res:%d\n", ret, res));
1248 nodemapv4 = (struct ctdb_node_mapv4 *)outdata.dptr;
1250 len = offsetof(struct ctdb_node_map, nodes) + nodemapv4->num*sizeof(struct ctdb_node_and_flags);
1251 (*nodemap) = talloc_zero_size(mem_ctx, len);
1252 CTDB_NO_MEMORY(ctdb, (*nodemap));
1254 (*nodemap)->num = nodemapv4->num;
1255 for (i=0; i<nodemapv4->num; i++) {
1256 (*nodemap)->nodes[i].pnn = nodemapv4->nodes[i].pnn;
1257 (*nodemap)->nodes[i].flags = nodemapv4->nodes[i].flags;
1258 (*nodemap)->nodes[i].addr.ip = nodemapv4->nodes[i].sin;
1259 (*nodemap)->nodes[i].addr.sa.sa_family = AF_INET;
1262 talloc_free(outdata.dptr);
1268 drop the transport, reload the nodes file and restart the transport
1270 int ctdb_ctrl_reload_nodes_file(struct ctdb_context *ctdb,
1271 struct timeval timeout, uint32_t destnode)
1276 ret = ctdb_control(ctdb, destnode, 0,
1277 CTDB_CONTROL_RELOAD_NODES_FILE, 0, tdb_null,
1278 NULL, NULL, &res, &timeout, NULL);
1279 if (ret != 0 || res != 0) {
1280 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reloadnodesfile failed\n"));
1289 set vnn map on a node
1291 int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1292 TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap)
1297 struct ctdb_vnn_map_wire *map;
1300 len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*vnnmap->size;
1301 map = talloc_size(mem_ctx, len);
1302 CTDB_NO_MEMORY(ctdb, map);
1304 map->generation = vnnmap->generation;
1305 map->size = vnnmap->size;
1306 memcpy(map->map, vnnmap->map, sizeof(uint32_t)*map->size);
1309 data.dptr = (uint8_t *)map;
1311 ret = ctdb_control(ctdb, destnode, 0,
1312 CTDB_CONTROL_SETVNNMAP, 0, data,
1313 NULL, NULL, &res, &timeout, NULL);
1314 if (ret != 0 || res != 0) {
1315 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setvnnmap failed\n"));
1326 async send for pull database
1328 struct ctdb_client_control_state *ctdb_ctrl_pulldb_send(
1329 struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
1330 uint32_t lmaster, TALLOC_CTX *mem_ctx, struct timeval timeout)
1333 struct ctdb_control_pulldb *pull;
1334 struct ctdb_client_control_state *state;
1336 pull = talloc(mem_ctx, struct ctdb_control_pulldb);
1337 CTDB_NO_MEMORY_NULL(ctdb, pull);
1340 pull->lmaster = lmaster;
1342 indata.dsize = sizeof(struct ctdb_control_pulldb);
1343 indata.dptr = (unsigned char *)pull;
1345 state = ctdb_control_send(ctdb, destnode, 0,
1346 CTDB_CONTROL_PULL_DB, 0, indata,
1347 mem_ctx, &timeout, NULL);
1354 async recv for pull database
1356 int ctdb_ctrl_pulldb_recv(
1357 struct ctdb_context *ctdb,
1358 TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state,
1364 ret = ctdb_control_recv(ctdb, state, mem_ctx, outdata, &res, NULL);
1365 if ( (ret != 0) || (res != 0) ){
1366 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_pulldb_recv failed\n"));
1374 pull all keys and records for a specific database on a node
1376 int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode,
1377 uint32_t dbid, uint32_t lmaster,
1378 TALLOC_CTX *mem_ctx, struct timeval timeout,
1381 struct ctdb_client_control_state *state;
1383 state = ctdb_ctrl_pulldb_send(ctdb, destnode, dbid, lmaster, mem_ctx,
1386 return ctdb_ctrl_pulldb_recv(ctdb, mem_ctx, state, outdata);
1391 change dmaster for all keys in the database to the new value
1393 int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1394 TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster)
1400 indata.dsize = 2*sizeof(uint32_t);
1401 indata.dptr = (unsigned char *)talloc_array(mem_ctx, uint32_t, 2);
1403 ((uint32_t *)(&indata.dptr[0]))[0] = dbid;
1404 ((uint32_t *)(&indata.dptr[0]))[1] = dmaster;
1406 ret = ctdb_control(ctdb, destnode, 0,
1407 CTDB_CONTROL_SET_DMASTER, 0, indata,
1408 NULL, NULL, &res, &timeout, NULL);
1409 if (ret != 0 || res != 0) {
1410 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setdmaster failed\n"));
1418 ping a node, return number of clients connected
1420 int ctdb_ctrl_ping(struct ctdb_context *ctdb, uint32_t destnode)
1425 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PING, 0,
1426 tdb_null, NULL, NULL, &res, NULL, NULL);
1434 find the real path to a ltdb
1436 int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1443 data.dptr = (uint8_t *)&dbid;
1444 data.dsize = sizeof(dbid);
1446 ret = ctdb_control(ctdb, destnode, 0,
1447 CTDB_CONTROL_GETDBPATH, 0, data,
1448 mem_ctx, &data, &res, &timeout, NULL);
1449 if (ret != 0 || res != 0) {
1453 (*path) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1454 if ((*path) == NULL) {
1458 talloc_free(data.dptr);
1464 find the name of a db
1466 int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1473 data.dptr = (uint8_t *)&dbid;
1474 data.dsize = sizeof(dbid);
1476 ret = ctdb_control(ctdb, destnode, 0,
1477 CTDB_CONTROL_GET_DBNAME, 0, data,
1478 mem_ctx, &data, &res, &timeout, NULL);
1479 if (ret != 0 || res != 0) {
1483 (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1484 if ((*name) == NULL) {
1488 talloc_free(data.dptr);
1494 get the health status of a db
1496 int ctdb_ctrl_getdbhealth(struct ctdb_context *ctdb,
1497 struct timeval timeout,
1499 uint32_t dbid, TALLOC_CTX *mem_ctx,
1500 const char **reason)
1506 data.dptr = (uint8_t *)&dbid;
1507 data.dsize = sizeof(dbid);
1509 ret = ctdb_control(ctdb, destnode, 0,
1510 CTDB_CONTROL_DB_GET_HEALTH, 0, data,
1511 mem_ctx, &data, &res, &timeout, NULL);
1512 if (ret != 0 || res != 0) {
1516 if (data.dsize == 0) {
1521 (*reason) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1522 if ((*reason) == NULL) {
1526 talloc_free(data.dptr);
1534 int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1535 TALLOC_CTX *mem_ctx, const char *name, bool persistent)
1541 data.dptr = discard_const(name);
1542 data.dsize = strlen(name)+1;
1544 ret = ctdb_control(ctdb, destnode, 0,
1545 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1547 mem_ctx, &data, &res, &timeout, NULL);
1549 if (ret != 0 || res != 0) {
1557 get debug level on a node
1559 int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level)
1565 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DEBUG, 0, tdb_null,
1566 ctdb, &data, &res, NULL, NULL);
1567 if (ret != 0 || res != 0) {
1570 if (data.dsize != sizeof(int32_t)) {
1571 DEBUG(DEBUG_ERR,("Bad control reply size in ctdb_get_debuglevel (got %u)\n",
1572 (unsigned)data.dsize));
1575 *level = *(int32_t *)data.dptr;
1576 talloc_free(data.dptr);
1581 set debug level on a node
1583 int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t level)
1589 data.dptr = (uint8_t *)&level;
1590 data.dsize = sizeof(level);
1592 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_DEBUG, 0, data,
1593 NULL, NULL, &res, NULL, NULL);
1594 if (ret != 0 || res != 0) {
1602 get a list of connected nodes
1604 uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
1605 struct timeval timeout,
1606 TALLOC_CTX *mem_ctx,
1607 uint32_t *num_nodes)
1609 struct ctdb_node_map *map=NULL;
1615 ret = ctdb_ctrl_getnodemap(ctdb, timeout, CTDB_CURRENT_NODE, mem_ctx, &map);
1620 nodes = talloc_array(mem_ctx, uint32_t, map->num);
1621 if (nodes == NULL) {
1625 for (i=0;i<map->num;i++) {
1626 if (!(map->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
1627 nodes[*num_nodes] = map->nodes[i].pnn;
1639 int ctdb_statistics_reset(struct ctdb_context *ctdb, uint32_t destnode)
1644 ret = ctdb_control(ctdb, destnode, 0,
1645 CTDB_CONTROL_STATISTICS_RESET, 0, tdb_null,
1646 NULL, NULL, &res, NULL, NULL);
1647 if (ret != 0 || res != 0) {
1648 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reset statistics failed\n"));
1655 this is the dummy null procedure that all databases support
1657 static int ctdb_null_func(struct ctdb_call_info *call)
1663 this is a plain fetch procedure that all databases support
1665 static int ctdb_fetch_func(struct ctdb_call_info *call)
1667 call->reply_data = &call->record_data;
1672 attach to a specific database - client call
1674 struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name, bool persistent, uint32_t tdb_flags)
1676 struct ctdb_db_context *ctdb_db;
1681 ctdb_db = ctdb_db_handle(ctdb, name);
1686 ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
1687 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db);
1689 ctdb_db->ctdb = ctdb;
1690 ctdb_db->db_name = talloc_strdup(ctdb_db, name);
1691 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db->db_name);
1693 data.dptr = discard_const(name);
1694 data.dsize = strlen(name)+1;
1696 /* tell ctdb daemon to attach */
1697 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, tdb_flags,
1698 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1699 0, data, ctdb_db, &data, &res, NULL, NULL);
1700 if (ret != 0 || res != 0 || data.dsize != sizeof(uint32_t)) {
1701 DEBUG(DEBUG_ERR,("Failed to attach to database '%s'\n", name));
1702 talloc_free(ctdb_db);
1706 ctdb_db->db_id = *(uint32_t *)data.dptr;
1707 talloc_free(data.dptr);
1709 ret = ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(2, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
1711 DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name));
1712 talloc_free(ctdb_db);
1716 tdb_flags = persistent?TDB_DEFAULT:TDB_NOSYNC;
1717 if (ctdb->valgrinding) {
1718 tdb_flags |= TDB_NOMMAP;
1720 tdb_flags |= TDB_DISALLOW_NESTING;
1722 ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 0, tdb_flags, O_RDWR, 0);
1723 if (ctdb_db->ltdb == NULL) {
1724 ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path);
1725 talloc_free(ctdb_db);
1729 ctdb_db->persistent = persistent;
1731 DLIST_ADD(ctdb->db_list, ctdb_db);
1733 /* add well known functions */
1734 ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC);
1735 ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC);
1742 setup a call for a database
1744 int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id)
1746 struct ctdb_registered_call *call;
1751 struct ctdb_control_set_call c;
1754 /* this is no longer valid with the separate daemon architecture */
1755 c.db_id = ctdb_db->db_id;
1759 data.dptr = (uint8_t *)&c;
1760 data.dsize = sizeof(c);
1762 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_SET_CALL, 0,
1763 data, NULL, NULL, &status, NULL, NULL);
1764 if (ret != 0 || status != 0) {
1765 DEBUG(DEBUG_ERR,("ctdb_set_call failed for call %u\n", id));
1770 /* also register locally */
1771 call = talloc(ctdb_db, struct ctdb_registered_call);
1775 DLIST_ADD(ctdb_db->calls, call);
1780 struct traverse_state {
1783 ctdb_traverse_func fn;
1788 called on each key during a ctdb_traverse
1790 static void traverse_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data, void *p)
1792 struct traverse_state *state = (struct traverse_state *)p;
1793 struct ctdb_rec_data *d = (struct ctdb_rec_data *)data.dptr;
1796 if (data.dsize < sizeof(uint32_t) ||
1797 d->length != data.dsize) {
1798 DEBUG(DEBUG_ERR,("Bad data size %u in traverse_handler\n", (unsigned)data.dsize));
1803 key.dsize = d->keylen;
1804 key.dptr = &d->data[0];
1805 data.dsize = d->datalen;
1806 data.dptr = &d->data[d->keylen];
1808 if (key.dsize == 0 && data.dsize == 0) {
1809 /* end of traverse */
1814 if (data.dsize == sizeof(struct ctdb_ltdb_header)) {
1815 /* empty records are deleted records in ctdb */
1819 if (state->fn(ctdb, key, data, state->private_data) != 0) {
1828 start a cluster wide traverse, calling the supplied fn on each record
1829 return the number of records traversed, or -1 on error
1831 int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *private_data)
1834 struct ctdb_traverse_start t;
1837 uint64_t srvid = (getpid() | 0xFLL<<60);
1838 struct traverse_state state;
1842 state.private_data = private_data;
1845 ret = ctdb_client_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
1847 DEBUG(DEBUG_ERR,("Failed to setup traverse handler\n"));
1851 t.db_id = ctdb_db->db_id;
1855 data.dptr = (uint8_t *)&t;
1856 data.dsize = sizeof(t);
1858 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_TRAVERSE_START, 0,
1859 data, NULL, NULL, &status, NULL, NULL);
1860 if (ret != 0 || status != 0) {
1861 DEBUG(DEBUG_ERR,("ctdb_traverse_all failed\n"));
1862 ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
1866 while (!state.done) {
1867 event_loop_once(ctdb_db->ctdb->ev);
1870 ret = ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
1872 DEBUG(DEBUG_ERR,("Failed to remove ctdb_traverse handler\n"));
1879 #define ISASCII(x) ((x>31)&&(x<128))
1881 called on each key during a catdb
1883 int ctdb_dumpdb_record(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
1886 FILE *f = (FILE *)p;
1887 struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
1889 fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
1890 for (i=0;i<key.dsize;i++) {
1891 if (ISASCII(key.dptr[i])) {
1892 fprintf(f, "%c", key.dptr[i]);
1894 fprintf(f, "\\%02X", key.dptr[i]);
1899 fprintf(f, "dmaster: %u\n", h->dmaster);
1900 fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
1902 fprintf(f, "data(%u) = \"", (unsigned)(data.dsize - sizeof(*h)));
1903 for (i=sizeof(*h);i<data.dsize;i++) {
1904 if (ISASCII(data.dptr[i])) {
1905 fprintf(f, "%c", data.dptr[i]);
1907 fprintf(f, "\\%02X", data.dptr[i]);
1918 convenience function to list all keys to stdout
1920 int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f)
1922 return ctdb_traverse(ctdb_db, ctdb_dumpdb_record, f);
1926 get the pid of a ctdb daemon
1928 int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid)
1933 ret = ctdb_control(ctdb, destnode, 0,
1934 CTDB_CONTROL_GET_PID, 0, tdb_null,
1935 NULL, NULL, &res, &timeout, NULL);
1937 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpid failed\n"));
1948 async freeze send control
1950 struct ctdb_client_control_state *
1951 ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t priority)
1953 return ctdb_control_send(ctdb, destnode, priority,
1954 CTDB_CONTROL_FREEZE, 0, tdb_null,
1955 mem_ctx, &timeout, NULL);
1959 async freeze recv control
1961 int ctdb_ctrl_freeze_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state)
1966 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1967 if ( (ret != 0) || (res != 0) ){
1968 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_freeze_recv failed\n"));
1976 freeze databases of a certain priority
1978 int ctdb_ctrl_freeze_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
1980 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1981 struct ctdb_client_control_state *state;
1984 state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode, priority);
1985 ret = ctdb_ctrl_freeze_recv(ctdb, tmp_ctx, state);
1986 talloc_free(tmp_ctx);
1991 /* Freeze all databases */
1992 int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
1996 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
1997 if (ctdb_ctrl_freeze_priority(ctdb, timeout, destnode, i) != 0) {
2005 thaw databases of a certain priority
2007 int ctdb_ctrl_thaw_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
2012 ret = ctdb_control(ctdb, destnode, priority,
2013 CTDB_CONTROL_THAW, 0, tdb_null,
2014 NULL, NULL, &res, &timeout, NULL);
2015 if (ret != 0 || res != 0) {
2016 DEBUG(DEBUG_ERR,(__location__ " ctdb_control thaw failed\n"));
2023 /* thaw all databases */
2024 int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2026 return ctdb_ctrl_thaw_priority(ctdb, timeout, destnode, 0);
2030 get pnn of a node, or -1
2032 int ctdb_ctrl_getpnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2037 ret = ctdb_control(ctdb, destnode, 0,
2038 CTDB_CONTROL_GET_PNN, 0, tdb_null,
2039 NULL, NULL, &res, &timeout, NULL);
2041 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpnn failed\n"));
2049 get the monitoring mode of a remote node
2051 int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *monmode)
2056 ret = ctdb_control(ctdb, destnode, 0,
2057 CTDB_CONTROL_GET_MONMODE, 0, tdb_null,
2058 NULL, NULL, &res, &timeout, NULL);
2060 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getmonmode failed\n"));
2071 set the monitoring mode of a remote node to active
2073 int ctdb_ctrl_enable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2078 ret = ctdb_control(ctdb, destnode, 0,
2079 CTDB_CONTROL_ENABLE_MONITOR, 0, tdb_null,
2080 NULL, NULL,NULL, &timeout, NULL);
2082 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enable_monitor failed\n"));
2092 set the monitoring mode of a remote node to disable
2094 int ctdb_ctrl_disable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2099 ret = ctdb_control(ctdb, destnode, 0,
2100 CTDB_CONTROL_DISABLE_MONITOR, 0, tdb_null,
2101 NULL, NULL, NULL, &timeout, NULL);
2103 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disable_monitor failed\n"));
2115 sent to a node to make it take over an ip address
2117 int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout,
2118 uint32_t destnode, struct ctdb_public_ip *ip)
2121 struct ctdb_public_ipv4 ipv4;
2125 if (ip->addr.sa.sa_family == AF_INET) {
2127 ipv4.sin = ip->addr.ip;
2129 data.dsize = sizeof(ipv4);
2130 data.dptr = (uint8_t *)&ipv4;
2132 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IPv4, 0, data, NULL,
2133 NULL, &res, &timeout, NULL);
2135 data.dsize = sizeof(*ip);
2136 data.dptr = (uint8_t *)ip;
2138 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IP, 0, data, NULL,
2139 NULL, &res, &timeout, NULL);
2142 if (ret != 0 || res != 0) {
2143 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for takeover_ip failed\n"));
2152 sent to a node to make it release an ip address
2154 int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout,
2155 uint32_t destnode, struct ctdb_public_ip *ip)
2158 struct ctdb_public_ipv4 ipv4;
2162 if (ip->addr.sa.sa_family == AF_INET) {
2164 ipv4.sin = ip->addr.ip;
2166 data.dsize = sizeof(ipv4);
2167 data.dptr = (uint8_t *)&ipv4;
2169 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IPv4, 0, data, NULL,
2170 NULL, &res, &timeout, NULL);
2172 data.dsize = sizeof(*ip);
2173 data.dptr = (uint8_t *)ip;
2175 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IP, 0, data, NULL,
2176 NULL, &res, &timeout, NULL);
2179 if (ret != 0 || res != 0) {
2180 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for release_ip failed\n"));
2191 int ctdb_ctrl_get_tunable(struct ctdb_context *ctdb,
2192 struct timeval timeout,
2194 const char *name, uint32_t *value)
2196 struct ctdb_control_get_tunable *t;
2197 TDB_DATA data, outdata;
2201 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(name) + 1;
2202 data.dptr = talloc_size(ctdb, data.dsize);
2203 CTDB_NO_MEMORY(ctdb, data.dptr);
2205 t = (struct ctdb_control_get_tunable *)data.dptr;
2206 t->length = strlen(name)+1;
2207 memcpy(t->name, name, t->length);
2209 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_TUNABLE, 0, data, ctdb,
2210 &outdata, &res, &timeout, NULL);
2211 talloc_free(data.dptr);
2212 if (ret != 0 || res != 0) {
2213 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_tunable failed\n"));
2217 if (outdata.dsize != sizeof(uint32_t)) {
2218 DEBUG(DEBUG_ERR,("Invalid return data in get_tunable\n"));
2219 talloc_free(outdata.dptr);
2223 *value = *(uint32_t *)outdata.dptr;
2224 talloc_free(outdata.dptr);
2232 int ctdb_ctrl_set_tunable(struct ctdb_context *ctdb,
2233 struct timeval timeout,
2235 const char *name, uint32_t value)
2237 struct ctdb_control_set_tunable *t;
2242 data.dsize = offsetof(struct ctdb_control_set_tunable, name) + strlen(name) + 1;
2243 data.dptr = talloc_size(ctdb, data.dsize);
2244 CTDB_NO_MEMORY(ctdb, data.dptr);
2246 t = (struct ctdb_control_set_tunable *)data.dptr;
2247 t->length = strlen(name)+1;
2248 memcpy(t->name, name, t->length);
2251 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_TUNABLE, 0, data, NULL,
2252 NULL, &res, &timeout, NULL);
2253 talloc_free(data.dptr);
2254 if (ret != 0 || res != 0) {
2255 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_tunable failed\n"));
2265 int ctdb_ctrl_list_tunables(struct ctdb_context *ctdb,
2266 struct timeval timeout,
2268 TALLOC_CTX *mem_ctx,
2269 const char ***list, uint32_t *count)
2274 struct ctdb_control_list_tunable *t;
2277 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_LIST_TUNABLES, 0, tdb_null,
2278 mem_ctx, &outdata, &res, &timeout, NULL);
2279 if (ret != 0 || res != 0) {
2280 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for list_tunables failed\n"));
2284 t = (struct ctdb_control_list_tunable *)outdata.dptr;
2285 if (outdata.dsize < offsetof(struct ctdb_control_list_tunable, data) ||
2286 t->length > outdata.dsize-offsetof(struct ctdb_control_list_tunable, data)) {
2287 DEBUG(DEBUG_ERR,("Invalid data in list_tunables reply\n"));
2288 talloc_free(outdata.dptr);
2292 p = talloc_strndup(mem_ctx, (char *)t->data, t->length);
2293 CTDB_NO_MEMORY(ctdb, p);
2295 talloc_free(outdata.dptr);
2300 for (s=strtok_r(p, ":", &ptr); s; s=strtok_r(NULL, ":", &ptr)) {
2301 (*list) = talloc_realloc(mem_ctx, *list, const char *, 1+(*count));
2302 CTDB_NO_MEMORY(ctdb, *list);
2303 (*list)[*count] = talloc_strdup(*list, s);
2304 CTDB_NO_MEMORY(ctdb, (*list)[*count]);
2314 int ctdb_ctrl_get_public_ips_flags(struct ctdb_context *ctdb,
2315 struct timeval timeout, uint32_t destnode,
2316 TALLOC_CTX *mem_ctx,
2318 struct ctdb_all_public_ips **ips)
2324 ret = ctdb_control(ctdb, destnode, 0,
2325 CTDB_CONTROL_GET_PUBLIC_IPS, flags, tdb_null,
2326 mem_ctx, &outdata, &res, &timeout, NULL);
2327 if (ret == 0 && res == -1) {
2328 DEBUG(DEBUG_ERR,(__location__ " ctdb_control to get public ips failed, falling back to ipv4-only version\n"));
2329 return ctdb_ctrl_get_public_ipsv4(ctdb, timeout, destnode, mem_ctx, ips);
2331 if (ret != 0 || res != 0) {
2332 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed ret:%d res:%d\n", ret, res));
2336 *ips = (struct ctdb_all_public_ips *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
2337 talloc_free(outdata.dptr);
2342 int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
2343 struct timeval timeout, uint32_t destnode,
2344 TALLOC_CTX *mem_ctx,
2345 struct ctdb_all_public_ips **ips)
2347 return ctdb_ctrl_get_public_ips_flags(ctdb, timeout,
2352 int ctdb_ctrl_get_public_ipsv4(struct ctdb_context *ctdb,
2353 struct timeval timeout, uint32_t destnode,
2354 TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
2359 struct ctdb_all_public_ipsv4 *ipsv4;
2361 ret = ctdb_control(ctdb, destnode, 0,
2362 CTDB_CONTROL_GET_PUBLIC_IPSv4, 0, tdb_null,
2363 mem_ctx, &outdata, &res, &timeout, NULL);
2364 if (ret != 0 || res != 0) {
2365 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed\n"));
2369 ipsv4 = (struct ctdb_all_public_ipsv4 *)outdata.dptr;
2370 len = offsetof(struct ctdb_all_public_ips, ips) +
2371 ipsv4->num*sizeof(struct ctdb_public_ip);
2372 *ips = talloc_zero_size(mem_ctx, len);
2373 CTDB_NO_MEMORY(ctdb, *ips);
2374 (*ips)->num = ipsv4->num;
2375 for (i=0; i<ipsv4->num; i++) {
2376 (*ips)->ips[i].pnn = ipsv4->ips[i].pnn;
2377 (*ips)->ips[i].addr.ip = ipsv4->ips[i].sin;
2380 talloc_free(outdata.dptr);
2385 int ctdb_ctrl_get_public_ip_info(struct ctdb_context *ctdb,
2386 struct timeval timeout, uint32_t destnode,
2387 TALLOC_CTX *mem_ctx,
2388 const ctdb_sock_addr *addr,
2389 struct ctdb_control_public_ip_info **_info)
2395 struct ctdb_control_public_ip_info *info;
2399 indata.dptr = discard_const_p(uint8_t, addr);
2400 indata.dsize = sizeof(*addr);
2402 ret = ctdb_control(ctdb, destnode, 0,
2403 CTDB_CONTROL_GET_PUBLIC_IP_INFO, 0, indata,
2404 mem_ctx, &outdata, &res, &timeout, NULL);
2405 if (ret != 0 || res != 0) {
2406 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2407 "failed ret:%d res:%d\n",
2412 len = offsetof(struct ctdb_control_public_ip_info, ifaces);
2413 if (len > outdata.dsize) {
2414 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2415 "returned invalid data with size %u > %u\n",
2416 (unsigned int)outdata.dsize,
2417 (unsigned int)len));
2418 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2422 info = (struct ctdb_control_public_ip_info *)outdata.dptr;
2423 len += info->num*sizeof(struct ctdb_control_iface_info);
2425 if (len > outdata.dsize) {
2426 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2427 "returned invalid data with size %u > %u\n",
2428 (unsigned int)outdata.dsize,
2429 (unsigned int)len));
2430 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2434 /* make sure we null terminate the returned strings */
2435 for (i=0; i < info->num; i++) {
2436 info->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2439 *_info = (struct ctdb_control_public_ip_info *)talloc_memdup(mem_ctx,
2442 talloc_free(outdata.dptr);
2443 if (*_info == NULL) {
2444 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2445 "talloc_memdup size %u failed\n",
2446 (unsigned int)outdata.dsize));
2453 int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb,
2454 struct timeval timeout, uint32_t destnode,
2455 TALLOC_CTX *mem_ctx,
2456 struct ctdb_control_get_ifaces **_ifaces)
2461 struct ctdb_control_get_ifaces *ifaces;
2465 ret = ctdb_control(ctdb, destnode, 0,
2466 CTDB_CONTROL_GET_IFACES, 0, tdb_null,
2467 mem_ctx, &outdata, &res, &timeout, NULL);
2468 if (ret != 0 || res != 0) {
2469 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2470 "failed ret:%d res:%d\n",
2475 len = offsetof(struct ctdb_control_get_ifaces, ifaces);
2476 if (len > outdata.dsize) {
2477 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2478 "returned invalid data with size %u > %u\n",
2479 (unsigned int)outdata.dsize,
2480 (unsigned int)len));
2481 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2485 ifaces = (struct ctdb_control_get_ifaces *)outdata.dptr;
2486 len += ifaces->num*sizeof(struct ctdb_control_iface_info);
2488 if (len > outdata.dsize) {
2489 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2490 "returned invalid data with size %u > %u\n",
2491 (unsigned int)outdata.dsize,
2492 (unsigned int)len));
2493 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2497 /* make sure we null terminate the returned strings */
2498 for (i=0; i < ifaces->num; i++) {
2499 ifaces->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2502 *_ifaces = (struct ctdb_control_get_ifaces *)talloc_memdup(mem_ctx,
2505 talloc_free(outdata.dptr);
2506 if (*_ifaces == NULL) {
2507 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2508 "talloc_memdup size %u failed\n",
2509 (unsigned int)outdata.dsize));
2516 int ctdb_ctrl_set_iface_link(struct ctdb_context *ctdb,
2517 struct timeval timeout, uint32_t destnode,
2518 TALLOC_CTX *mem_ctx,
2519 const struct ctdb_control_iface_info *info)
2525 indata.dptr = discard_const_p(uint8_t, info);
2526 indata.dsize = sizeof(*info);
2528 ret = ctdb_control(ctdb, destnode, 0,
2529 CTDB_CONTROL_SET_IFACE_LINK_STATE, 0, indata,
2530 mem_ctx, NULL, &res, &timeout, NULL);
2531 if (ret != 0 || res != 0) {
2532 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set iface link "
2533 "failed ret:%d res:%d\n",
2542 set/clear the permanent disabled bit on a remote node
2544 int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
2545 uint32_t set, uint32_t clear)
2549 struct ctdb_node_map *nodemap=NULL;
2550 struct ctdb_node_flag_change c;
2551 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2556 /* find the recovery master */
2557 ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster);
2559 DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
2560 talloc_free(tmp_ctx);
2565 /* read the node flags from the recmaster */
2566 ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap);
2568 DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode));
2569 talloc_free(tmp_ctx);
2572 if (destnode >= nodemap->num) {
2573 DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode));
2574 talloc_free(tmp_ctx);
2579 c.old_flags = nodemap->nodes[destnode].flags;
2580 c.new_flags = c.old_flags;
2582 c.new_flags &= ~clear;
2584 data.dsize = sizeof(c);
2585 data.dptr = (unsigned char *)&c;
2587 /* send the flags update to all connected nodes */
2588 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2590 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
2592 timeout, false, data,
2595 DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
2597 talloc_free(tmp_ctx);
2601 talloc_free(tmp_ctx);
2609 int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
2610 struct timeval timeout,
2612 struct ctdb_tunable *tunables)
2618 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_ALL_TUNABLES, 0, tdb_null, ctdb,
2619 &outdata, &res, &timeout, NULL);
2620 if (ret != 0 || res != 0) {
2621 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get all tunables failed\n"));
2625 if (outdata.dsize != sizeof(*tunables)) {
2626 DEBUG(DEBUG_ERR,(__location__ " bad data size %u in ctdb_ctrl_get_all_tunables should be %u\n",
2627 (unsigned)outdata.dsize, (unsigned)sizeof(*tunables)));
2631 *tunables = *(struct ctdb_tunable *)outdata.dptr;
2632 talloc_free(outdata.dptr);
2637 add a public address to a node
2639 int ctdb_ctrl_add_public_ip(struct ctdb_context *ctdb,
2640 struct timeval timeout,
2642 struct ctdb_control_ip_iface *pub)
2648 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
2649 data.dptr = (unsigned char *)pub;
2651 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_ADD_PUBLIC_IP, 0, data, NULL,
2652 NULL, &res, &timeout, NULL);
2653 if (ret != 0 || res != 0) {
2654 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for add_public_ip failed\n"));
2662 delete a public address from a node
2664 int ctdb_ctrl_del_public_ip(struct ctdb_context *ctdb,
2665 struct timeval timeout,
2667 struct ctdb_control_ip_iface *pub)
2673 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
2674 data.dptr = (unsigned char *)pub;
2676 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_DEL_PUBLIC_IP, 0, data, NULL,
2677 NULL, &res, &timeout, NULL);
2678 if (ret != 0 || res != 0) {
2679 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for del_public_ip failed\n"));
2687 kill a tcp connection
2689 int ctdb_ctrl_killtcp(struct ctdb_context *ctdb,
2690 struct timeval timeout,
2692 struct ctdb_control_killtcp *killtcp)
2698 data.dsize = sizeof(struct ctdb_control_killtcp);
2699 data.dptr = (unsigned char *)killtcp;
2701 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_KILL_TCP, 0, data, NULL,
2702 NULL, &res, &timeout, NULL);
2703 if (ret != 0 || res != 0) {
2704 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for killtcp failed\n"));
2714 int ctdb_ctrl_gratious_arp(struct ctdb_context *ctdb,
2715 struct timeval timeout,
2717 ctdb_sock_addr *addr,
2723 struct ctdb_control_gratious_arp *gratious_arp;
2724 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2727 len = strlen(ifname)+1;
2728 gratious_arp = talloc_size(tmp_ctx,
2729 offsetof(struct ctdb_control_gratious_arp, iface) + len);
2730 CTDB_NO_MEMORY(ctdb, gratious_arp);
2732 gratious_arp->addr = *addr;
2733 gratious_arp->len = len;
2734 memcpy(&gratious_arp->iface[0], ifname, len);
2737 data.dsize = offsetof(struct ctdb_control_gratious_arp, iface) + len;
2738 data.dptr = (unsigned char *)gratious_arp;
2740 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SEND_GRATIOUS_ARP, 0, data, NULL,
2741 NULL, &res, &timeout, NULL);
2742 if (ret != 0 || res != 0) {
2743 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for gratious_arp failed\n"));
2744 talloc_free(tmp_ctx);
2748 talloc_free(tmp_ctx);
2753 get a list of all tcp tickles that a node knows about for a particular vnn
2755 int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
2756 struct timeval timeout, uint32_t destnode,
2757 TALLOC_CTX *mem_ctx,
2758 ctdb_sock_addr *addr,
2759 struct ctdb_control_tcp_tickle_list **list)
2762 TDB_DATA data, outdata;
2765 data.dptr = (uint8_t*)addr;
2766 data.dsize = sizeof(ctdb_sock_addr);
2768 ret = ctdb_control(ctdb, destnode, 0,
2769 CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data,
2770 mem_ctx, &outdata, &status, NULL, NULL);
2771 if (ret != 0 || status != 0) {
2772 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get tcp tickles failed\n"));
2776 *list = (struct ctdb_control_tcp_tickle_list *)outdata.dptr;
2782 register a server id
2784 int ctdb_ctrl_register_server_id(struct ctdb_context *ctdb,
2785 struct timeval timeout,
2786 struct ctdb_server_id *id)
2792 data.dsize = sizeof(struct ctdb_server_id);
2793 data.dptr = (unsigned char *)id;
2795 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
2796 CTDB_CONTROL_REGISTER_SERVER_ID,
2798 NULL, &res, &timeout, NULL);
2799 if (ret != 0 || res != 0) {
2800 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for register server id failed\n"));
2808 unregister a server id
2810 int ctdb_ctrl_unregister_server_id(struct ctdb_context *ctdb,
2811 struct timeval timeout,
2812 struct ctdb_server_id *id)
2818 data.dsize = sizeof(struct ctdb_server_id);
2819 data.dptr = (unsigned char *)id;
2821 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
2822 CTDB_CONTROL_UNREGISTER_SERVER_ID,
2824 NULL, &res, &timeout, NULL);
2825 if (ret != 0 || res != 0) {
2826 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for unregister server id failed\n"));
2835 check if a server id exists
2837 if a server id does exist, return *status == 1, otherwise *status == 0
2839 int ctdb_ctrl_check_server_id(struct ctdb_context *ctdb,
2840 struct timeval timeout,
2842 struct ctdb_server_id *id,
2849 data.dsize = sizeof(struct ctdb_server_id);
2850 data.dptr = (unsigned char *)id;
2852 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CHECK_SERVER_ID,
2854 NULL, &res, &timeout, NULL);
2856 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for check server id failed\n"));
2870 get the list of server ids that are registered on a node
2872 int ctdb_ctrl_get_server_id_list(struct ctdb_context *ctdb,
2873 TALLOC_CTX *mem_ctx,
2874 struct timeval timeout, uint32_t destnode,
2875 struct ctdb_server_id_list **svid_list)
2881 ret = ctdb_control(ctdb, destnode, 0,
2882 CTDB_CONTROL_GET_SERVER_ID_LIST, 0, tdb_null,
2883 mem_ctx, &outdata, &res, &timeout, NULL);
2884 if (ret != 0 || res != 0) {
2885 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_server_id_list failed\n"));
2889 *svid_list = (struct ctdb_server_id_list *)talloc_steal(mem_ctx, outdata.dptr);
2895 initialise the ctdb daemon for client applications
2897 NOTE: In current code the daemon does not fork. This is for testing purposes only
2898 and to simplify the code.
2900 struct ctdb_context *ctdb_init(struct event_context *ev)
2903 struct ctdb_context *ctdb;
2905 ctdb = talloc_zero(ev, struct ctdb_context);
2907 DEBUG(DEBUG_ERR,(__location__ " talloc_zero failed.\n"));
2911 ctdb->idr = idr_init(ctdb);
2912 /* Wrap early to exercise code. */
2913 ctdb->lastid = INT_MAX-200;
2914 CTDB_NO_MEMORY_NULL(ctdb, ctdb->idr);
2916 ret = ctdb_set_socketname(ctdb, CTDB_PATH);
2918 DEBUG(DEBUG_ERR,(__location__ " ctdb_set_socketname failed.\n"));
2923 ctdb->statistics.statistics_start_time = timeval_current();
2932 void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags)
2934 ctdb->flags |= flags;
2938 setup the local socket name
2940 int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
2942 ctdb->daemon.name = talloc_strdup(ctdb, socketname);
2943 CTDB_NO_MEMORY(ctdb, ctdb->daemon.name);
2948 const char *ctdb_get_socketname(struct ctdb_context *ctdb)
2950 return ctdb->daemon.name;
2954 return the pnn of this node
2956 uint32_t ctdb_get_pnn(struct ctdb_context *ctdb)
2963 get the uptime of a remote node
2965 struct ctdb_client_control_state *
2966 ctdb_ctrl_uptime_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
2968 return ctdb_control_send(ctdb, destnode, 0,
2969 CTDB_CONTROL_UPTIME, 0, tdb_null,
2970 mem_ctx, &timeout, NULL);
2973 int ctdb_ctrl_uptime_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, struct ctdb_uptime **uptime)
2979 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
2980 if (ret != 0 || res != 0) {
2981 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_uptime_recv failed\n"));
2985 *uptime = (struct ctdb_uptime *)talloc_steal(mem_ctx, outdata.dptr);
2990 int ctdb_ctrl_uptime(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_uptime **uptime)
2992 struct ctdb_client_control_state *state;
2994 state = ctdb_ctrl_uptime_send(ctdb, mem_ctx, timeout, destnode);
2995 return ctdb_ctrl_uptime_recv(ctdb, mem_ctx, state, uptime);
2999 send a control to execute the "recovered" event script on a node
3001 int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3006 ret = ctdb_control(ctdb, destnode, 0,
3007 CTDB_CONTROL_END_RECOVERY, 0, tdb_null,
3008 NULL, NULL, &status, &timeout, NULL);
3009 if (ret != 0 || status != 0) {
3010 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for end_recovery failed\n"));
3018 callback for the async helpers used when sending the same control
3019 to multiple nodes in parallell.
3021 static void async_callback(struct ctdb_client_control_state *state)
3023 struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
3024 struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
3028 uint32_t destnode = state->c->hdr.destnode;
3030 /* one more node has responded with recmode data */
3033 /* if we failed to push the db, then return an error and let
3034 the main loop try again.
3036 if (state->state != CTDB_CONTROL_DONE) {
3037 if ( !data->dont_log_errors) {
3038 DEBUG(DEBUG_ERR,("Async operation failed with state %d, opcode:%u\n", state->state, data->opcode));
3041 if (data->fail_callback) {
3042 data->fail_callback(ctdb, destnode, res, outdata,
3043 data->callback_data);
3048 state->async.fn = NULL;
3050 ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL);
3051 if ((ret != 0) || (res != 0)) {
3052 if ( !data->dont_log_errors) {
3053 DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d opcode=%u\n", ret, (int)res, data->opcode));
3056 if (data->fail_callback) {
3057 data->fail_callback(ctdb, destnode, res, outdata,
3058 data->callback_data);
3061 if ((ret == 0) && (data->callback != NULL)) {
3062 data->callback(ctdb, destnode, res, outdata,
3063 data->callback_data);
3068 void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state)
3070 /* set up the callback functions */
3071 state->async.fn = async_callback;
3072 state->async.private_data = data;
3074 /* one more control to wait for to complete */
3079 /* wait for up to the maximum number of seconds allowed
3080 or until all nodes we expect a response from has replied
3082 int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data)
3084 while (data->count > 0) {
3085 event_loop_once(ctdb->ev);
3087 if (data->fail_count != 0) {
3088 if (!data->dont_log_errors) {
3089 DEBUG(DEBUG_ERR,("Async wait failed - fail_count=%u\n",
3099 perform a simple control on the listed nodes
3100 The control cannot return data
3102 int ctdb_client_async_control(struct ctdb_context *ctdb,
3103 enum ctdb_controls opcode,
3106 struct timeval timeout,
3107 bool dont_log_errors,
3109 client_async_callback client_callback,
3110 client_async_callback fail_callback,
3111 void *callback_data)
3113 struct client_async_data *async_data;
3114 struct ctdb_client_control_state *state;
3117 async_data = talloc_zero(ctdb, struct client_async_data);
3118 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
3119 async_data->dont_log_errors = dont_log_errors;
3120 async_data->callback = client_callback;
3121 async_data->fail_callback = fail_callback;
3122 async_data->callback_data = callback_data;
3123 async_data->opcode = opcode;
3125 num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
3127 /* loop over all nodes and send an async control to each of them */
3128 for (j=0; j<num_nodes; j++) {
3129 uint32_t pnn = nodes[j];
3131 state = ctdb_control_send(ctdb, pnn, srvid, opcode,
3132 0, data, async_data, &timeout, NULL);
3133 if (state == NULL) {
3134 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
3135 talloc_free(async_data);
3139 ctdb_client_async_add(async_data, state);
3142 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3143 talloc_free(async_data);
3147 talloc_free(async_data);
3151 uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
3152 struct ctdb_vnn_map *vnn_map,
3153 TALLOC_CTX *mem_ctx,
3156 int i, j, num_nodes;
3159 for (i=num_nodes=0;i<vnn_map->size;i++) {
3160 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3166 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3167 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3169 for (i=j=0;i<vnn_map->size;i++) {
3170 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3173 nodes[j++] = vnn_map->map[i];
3179 uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
3180 struct ctdb_node_map *node_map,
3181 TALLOC_CTX *mem_ctx,
3184 int i, j, num_nodes;
3187 for (i=num_nodes=0;i<node_map->num;i++) {
3188 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3191 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3197 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3198 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3200 for (i=j=0;i<node_map->num;i++) {
3201 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3204 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3207 nodes[j++] = node_map->nodes[i].pnn;
3213 uint32_t *list_of_active_nodes_except_pnn(struct ctdb_context *ctdb,
3214 struct ctdb_node_map *node_map,
3215 TALLOC_CTX *mem_ctx,
3218 int i, j, num_nodes;
3221 for (i=num_nodes=0;i<node_map->num;i++) {
3222 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3225 if (node_map->nodes[i].pnn == pnn) {
3231 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3232 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3234 for (i=j=0;i<node_map->num;i++) {
3235 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3238 if (node_map->nodes[i].pnn == pnn) {
3241 nodes[j++] = node_map->nodes[i].pnn;
3247 uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
3248 struct ctdb_node_map *node_map,
3249 TALLOC_CTX *mem_ctx,
3252 int i, j, num_nodes;
3255 for (i=num_nodes=0;i<node_map->num;i++) {
3256 if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
3259 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3265 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3266 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3268 for (i=j=0;i<node_map->num;i++) {
3269 if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
3272 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3275 nodes[j++] = node_map->nodes[i].pnn;
3282 this is used to test if a pnn lock exists and if it exists will return
3283 the number of connections that pnn has reported or -1 if that recovery
3284 daemon is not running.
3287 ctdb_read_pnn_lock(int fd, int32_t pnn)
3292 lock.l_type = F_WRLCK;
3293 lock.l_whence = SEEK_SET;
3298 if (fcntl(fd, F_GETLK, &lock) != 0) {
3299 DEBUG(DEBUG_ERR, (__location__ " F_GETLK failed with %s\n", strerror(errno)));
3303 if (lock.l_type == F_UNLCK) {
3307 if (pread(fd, &c, 1, pnn) == -1) {
3308 DEBUG(DEBUG_CRIT,(__location__ " failed read pnn count - %s\n", strerror(errno)));
3316 get capabilities of a remote node
3318 struct ctdb_client_control_state *
3319 ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
3321 return ctdb_control_send(ctdb, destnode, 0,
3322 CTDB_CONTROL_GET_CAPABILITIES, 0, tdb_null,
3323 mem_ctx, &timeout, NULL);
3326 int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities)
3332 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
3333 if ( (ret != 0) || (res != 0) ) {
3334 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n"));
3339 *capabilities = *((uint32_t *)outdata.dptr);
3345 int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities)
3347 struct ctdb_client_control_state *state;
3348 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
3351 state = ctdb_ctrl_getcapabilities_send(ctdb, tmp_ctx, timeout, destnode);
3352 ret = ctdb_ctrl_getcapabilities_recv(ctdb, tmp_ctx, state, capabilities);
3353 talloc_free(tmp_ctx);
3358 * check whether a transaction is active on a given db on a given node
3360 int32_t ctdb_ctrl_transaction_active(struct ctdb_context *ctdb,
3368 indata.dptr = (uint8_t *)&db_id;
3369 indata.dsize = sizeof(db_id);
3371 ret = ctdb_control(ctdb, destnode, 0,
3372 CTDB_CONTROL_TRANS2_ACTIVE,
3373 0, indata, NULL, NULL, &status,
3377 DEBUG(DEBUG_ERR, (__location__ " ctdb control for transaction_active failed\n"));
3385 struct ctdb_transaction_handle {
3386 struct ctdb_db_context *ctdb_db;
3389 * we store the reads and writes done under a transaction:
3390 * - one list stores both reads and writes (m_all),
3391 * - the other just writes (m_write)
3393 struct ctdb_marshall_buffer *m_all;
3394 struct ctdb_marshall_buffer *m_write;
3397 /* start a transaction on a database */
3398 static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h)
3400 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3404 /* start a transaction on a database */
3405 static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
3407 struct ctdb_record_handle *rh;
3410 struct ctdb_ltdb_header header;
3411 TALLOC_CTX *tmp_ctx;
3412 const char *keyname = CTDB_TRANSACTION_LOCK_KEY;
3414 struct ctdb_db_context *ctdb_db = h->ctdb_db;
3418 key.dptr = discard_const(keyname);
3419 key.dsize = strlen(keyname);
3421 if (!ctdb_db->persistent) {
3422 DEBUG(DEBUG_ERR,(__location__ " Attempted transaction on non-persistent database\n"));
3427 tmp_ctx = talloc_new(h);
3429 rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL);
3431 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n"));
3432 talloc_free(tmp_ctx);
3436 status = ctdb_ctrl_transaction_active(ctdb_db->ctdb,
3440 unsigned long int usec = (1000 + random()) % 100000;
3441 DEBUG(DEBUG_DEBUG, (__location__ " transaction is active "
3442 "on db_id[0x%08x]. waiting for %lu "
3444 ctdb_db->db_id, usec));
3445 talloc_free(tmp_ctx);
3451 * store the pid in the database:
3452 * it is not enough that the node is dmaster...
3455 data.dptr = (unsigned char *)&pid;
3456 data.dsize = sizeof(pid_t);
3458 rh->header.dmaster = ctdb_db->ctdb->pnn;
3459 ret = ctdb_ltdb_store(ctdb_db, key, &(rh->header), data);
3461 DEBUG(DEBUG_ERR, (__location__ " Failed to store pid in "
3462 "transaction record\n"));
3463 talloc_free(tmp_ctx);
3469 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
3471 DEBUG(DEBUG_ERR,(__location__ " Failed to start tdb transaction\n"));
3472 talloc_free(tmp_ctx);
3476 ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, &data);
3478 DEBUG(DEBUG_ERR,(__location__ " Failed to re-fetch transaction "
3479 "lock record inside transaction\n"));
3480 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3481 talloc_free(tmp_ctx);
3485 if (header.dmaster != ctdb_db->ctdb->pnn) {
3486 DEBUG(DEBUG_DEBUG,(__location__ " not dmaster any more on "
3487 "transaction lock record\n"));
3488 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3489 talloc_free(tmp_ctx);
3493 if ((data.dsize != sizeof(pid_t)) || (*(pid_t *)(data.dptr) != pid)) {
3494 DEBUG(DEBUG_DEBUG, (__location__ " my pid is not stored in "
3495 "the transaction lock record\n"));
3496 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3497 talloc_free(tmp_ctx);
3501 talloc_free(tmp_ctx);
3507 /* start a transaction on a database */
3508 struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
3509 TALLOC_CTX *mem_ctx)
3511 struct ctdb_transaction_handle *h;
3514 h = talloc_zero(mem_ctx, struct ctdb_transaction_handle);
3516 DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n"));
3520 h->ctdb_db = ctdb_db;
3522 ret = ctdb_transaction_fetch_start(h);
3528 talloc_set_destructor(h, ctdb_transaction_destructor);
3536 fetch a record inside a transaction
3538 int ctdb_transaction_fetch(struct ctdb_transaction_handle *h,
3539 TALLOC_CTX *mem_ctx,
3540 TDB_DATA key, TDB_DATA *data)
3542 struct ctdb_ltdb_header header;
3545 ZERO_STRUCT(header);
3547 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data);
3548 if (ret == -1 && header.dmaster == (uint32_t)-1) {
3549 /* record doesn't exist yet */
3558 if (!h->in_replay) {
3559 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data);
3560 if (h->m_all == NULL) {
3561 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3570 stores a record inside a transaction
3572 int ctdb_transaction_store(struct ctdb_transaction_handle *h,
3573 TDB_DATA key, TDB_DATA data)
3575 TALLOC_CTX *tmp_ctx = talloc_new(h);
3576 struct ctdb_ltdb_header header;
3580 ZERO_STRUCT(header);
3582 /* we need the header so we can update the RSN */
3583 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
3584 if (ret == -1 && header.dmaster == (uint32_t)-1) {
3585 /* the record doesn't exist - create one with us as dmaster.
3586 This is only safe because we are in a transaction and this
3587 is a persistent database */
3588 ZERO_STRUCT(header);
3589 } else if (ret != 0) {
3590 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n"));
3591 talloc_free(tmp_ctx);
3595 if (data.dsize == olddata.dsize &&
3596 memcmp(data.dptr, olddata.dptr, data.dsize) == 0) {
3597 /* save writing the same data */
3598 talloc_free(tmp_ctx);
3602 header.dmaster = h->ctdb_db->ctdb->pnn;
3605 if (!h->in_replay) {
3606 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data);
3607 if (h->m_all == NULL) {
3608 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3609 talloc_free(tmp_ctx);
3614 h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
3615 if (h->m_write == NULL) {
3616 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3617 talloc_free(tmp_ctx);
3621 ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data);
3623 talloc_free(tmp_ctx);
3629 replay a transaction
3631 static int ctdb_replay_transaction(struct ctdb_transaction_handle *h)
3634 struct ctdb_rec_data *rec = NULL;
3636 h->in_replay = true;
3637 talloc_free(h->m_write);
3640 ret = ctdb_transaction_fetch_start(h);
3645 for (i=0;i<h->m_all->count;i++) {
3648 rec = ctdb_marshall_loop_next(h->m_all, rec, NULL, NULL, &key, &data);
3650 DEBUG(DEBUG_ERR, (__location__ " Out of records in ctdb_replay_transaction?\n"));
3654 if (rec->reqid == 0) {
3656 if (ctdb_transaction_store(h, key, data) != 0) {
3661 TALLOC_CTX *tmp_ctx = talloc_new(h);
3663 if (ctdb_transaction_fetch(h, tmp_ctx, key, &data2) != 0) {
3664 talloc_free(tmp_ctx);
3667 if (data2.dsize != data.dsize ||
3668 memcmp(data2.dptr, data.dptr, data.dsize) != 0) {
3669 /* the record has changed on us - we have to give up */
3670 talloc_free(tmp_ctx);
3673 talloc_free(tmp_ctx);
3680 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3686 commit a transaction
3688 int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
3692 struct ctdb_context *ctdb = h->ctdb_db->ctdb;
3693 struct timeval timeout;
3694 enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR;
3696 talloc_set_destructor(h, NULL);
3698 /* our commit strategy is quite complex.
3700 - we first try to commit the changes to all other nodes
3702 - if that works, then we commit locally and we are done
3704 - if a commit on another node fails, then we need to cancel
3705 the transaction, then restart the transaction (thus
3706 opening a window of time for a pending recovery to
3707 complete), then replay the transaction, checking all the
3708 reads and writes (checking that reads give the same data,
3709 and writes succeed). Then we retry the transaction to the
3714 if (h->m_write == NULL) {
3715 /* no changes were made */
3716 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3721 /* tell ctdbd to commit to the other nodes */
3722 timeout = timeval_current_ofs(1, 0);
3723 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3724 retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, 0,
3725 ctdb_marshall_finish(h->m_write), NULL, NULL, &status,
3727 if (ret != 0 || status != 0) {
3728 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3729 DEBUG(DEBUG_NOTICE, (__location__ " transaction commit%s failed"
3730 ", retrying after 1 second...\n",
3731 (retries==0)?"":"retry "));
3735 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3737 /* work out what error code we will give if we
3738 have to fail the operation */
3739 switch ((enum ctdb_trans2_commit_error)status) {
3740 case CTDB_TRANS2_COMMIT_SUCCESS:
3741 case CTDB_TRANS2_COMMIT_SOMEFAIL:
3742 case CTDB_TRANS2_COMMIT_TIMEOUT:
3743 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3745 case CTDB_TRANS2_COMMIT_ALLFAIL:
3746 failure_control = CTDB_CONTROL_TRANS2_FINISHED;
3751 if (++retries == 100) {
3752 DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n",
3753 h->ctdb_db->db_id, retries, (unsigned)failure_control));
3754 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3755 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3756 tdb_null, NULL, NULL, NULL, NULL, NULL);
3761 if (ctdb_replay_transaction(h) != 0) {
3762 DEBUG(DEBUG_ERR, (__location__ " Failed to replay "
3763 "transaction on db 0x%08x, "
3764 "failure control =%u\n",
3766 (unsigned)failure_control));
3767 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3768 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3769 tdb_null, NULL, NULL, NULL, NULL, NULL);
3775 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3778 /* do the real commit locally */
3779 ret = tdb_transaction_commit(h->ctdb_db->ltdb->tdb);
3781 DEBUG(DEBUG_ERR, (__location__ " Failed to commit transaction "
3782 "on db id 0x%08x locally, "
3783 "failure_control=%u\n",
3785 (unsigned)failure_control));
3786 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3787 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3788 tdb_null, NULL, NULL, NULL, NULL, NULL);
3793 /* tell ctdbd that we are finished with our local commit */
3794 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3795 CTDB_CONTROL_TRANS2_FINISHED, CTDB_CTRL_FLAG_NOREPLY,
3796 tdb_null, NULL, NULL, NULL, NULL, NULL);
3802 recovery daemon ping to main daemon
3804 int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
3809 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
3810 ctdb, NULL, &res, NULL, NULL);
3811 if (ret != 0 || res != 0) {
3812 DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
3819 /* when forking the main daemon and the child process needs to connect back
3820 * to the daemon as a client process, this function can be used to change
3821 * the ctdb context from daemon into client mode
3823 int switch_from_server_to_client(struct ctdb_context *ctdb, const char *fmt, ...)
3828 /* Add extra information so we can identify this in the logs */
3830 debug_extra = talloc_strdup_append(talloc_vasprintf(NULL, fmt, ap), ":");
3833 /* shutdown the transport */
3834 if (ctdb->methods) {
3835 ctdb->methods->shutdown(ctdb);
3838 /* get a new event context */
3839 talloc_free(ctdb->ev);
3840 ctdb->ev = event_context_init(ctdb);
3841 tevent_loop_allow_nesting(ctdb->ev);
3843 close(ctdb->daemon.sd);
3844 ctdb->daemon.sd = -1;
3846 /* initialise ctdb */
3847 ret = ctdb_socket_connect(ctdb);
3849 DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
3857 get the status of running the monitor eventscripts: NULL means never run.
3859 int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb,
3860 struct timeval timeout, uint32_t destnode,
3861 TALLOC_CTX *mem_ctx, enum ctdb_eventscript_call type,
3862 struct ctdb_scripts_wire **script_status)
3865 TDB_DATA outdata, indata;
3867 uint32_t uinttype = type;
3869 indata.dptr = (uint8_t *)&uinttype;
3870 indata.dsize = sizeof(uinttype);
3872 ret = ctdb_control(ctdb, destnode, 0,
3873 CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, indata,
3874 mem_ctx, &outdata, &res, &timeout, NULL);
3875 if (ret != 0 || res != 0) {
3876 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getscriptstatus failed ret:%d res:%d\n", ret, res));
3880 if (outdata.dsize == 0) {
3881 *script_status = NULL;
3883 *script_status = (struct ctdb_scripts_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
3884 talloc_free(outdata.dptr);
3891 tell the main daemon how long it took to lock the reclock file
3893 int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval timeout, double latency)
3899 data.dptr = (uint8_t *)&latency;
3900 data.dsize = sizeof(latency);
3902 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_RECLOCK_LATENCY, 0, data,
3903 ctdb, NULL, &res, NULL, NULL);
3904 if (ret != 0 || res != 0) {
3905 DEBUG(DEBUG_ERR,("Failed to send recd reclock latency\n"));
3913 get the name of the reclock file
3915 int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout,
3916 uint32_t destnode, TALLOC_CTX *mem_ctx,
3923 ret = ctdb_control(ctdb, destnode, 0,
3924 CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
3925 mem_ctx, &data, &res, &timeout, NULL);
3926 if (ret != 0 || res != 0) {
3930 if (data.dsize == 0) {
3933 *name = talloc_strdup(mem_ctx, discard_const(data.dptr));
3935 talloc_free(data.dptr);
3941 set the reclock filename for a node
3943 int ctdb_ctrl_setreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *reclock)
3949 if (reclock == NULL) {
3953 data.dsize = strlen(reclock) + 1;
3954 data.dptr = discard_const(reclock);
3957 ret = ctdb_control(ctdb, destnode, 0,
3958 CTDB_CONTROL_SET_RECLOCK_FILE, 0, data,
3959 NULL, NULL, &res, &timeout, NULL);
3960 if (ret != 0 || res != 0) {
3961 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setreclock failed\n"));
3971 int ctdb_ctrl_stop_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3976 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_STOP_NODE, 0, tdb_null,
3977 ctdb, NULL, &res, &timeout, NULL);
3978 if (ret != 0 || res != 0) {
3979 DEBUG(DEBUG_ERR,("Failed to stop node\n"));
3989 int ctdb_ctrl_continue_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3993 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CONTINUE_NODE, 0, tdb_null,
3994 ctdb, NULL, NULL, &timeout, NULL);
3996 DEBUG(DEBUG_ERR,("Failed to continue node\n"));
4004 set the natgw state for a node
4006 int ctdb_ctrl_setnatgwstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t natgwstate)
4012 data.dsize = sizeof(natgwstate);
4013 data.dptr = (uint8_t *)&natgwstate;
4015 ret = ctdb_control(ctdb, destnode, 0,
4016 CTDB_CONTROL_SET_NATGWSTATE, 0, data,
4017 NULL, NULL, &res, &timeout, NULL);
4018 if (ret != 0 || res != 0) {
4019 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setnatgwstate failed\n"));
4027 set the lmaster role for a node
4029 int ctdb_ctrl_setlmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t lmasterrole)
4035 data.dsize = sizeof(lmasterrole);
4036 data.dptr = (uint8_t *)&lmasterrole;
4038 ret = ctdb_control(ctdb, destnode, 0,
4039 CTDB_CONTROL_SET_LMASTERROLE, 0, data,
4040 NULL, NULL, &res, &timeout, NULL);
4041 if (ret != 0 || res != 0) {
4042 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setlmasterrole failed\n"));
4050 set the recmaster role for a node
4052 int ctdb_ctrl_setrecmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmasterrole)
4058 data.dsize = sizeof(recmasterrole);
4059 data.dptr = (uint8_t *)&recmasterrole;
4061 ret = ctdb_control(ctdb, destnode, 0,
4062 CTDB_CONTROL_SET_RECMASTERROLE, 0, data,
4063 NULL, NULL, &res, &timeout, NULL);
4064 if (ret != 0 || res != 0) {
4065 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmasterrole failed\n"));
4072 /* enable an eventscript
4074 int ctdb_ctrl_enablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4080 data.dsize = strlen(script) + 1;
4081 data.dptr = discard_const(script);
4083 ret = ctdb_control(ctdb, destnode, 0,
4084 CTDB_CONTROL_ENABLE_SCRIPT, 0, data,
4085 NULL, NULL, &res, &timeout, NULL);
4086 if (ret != 0 || res != 0) {
4087 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enablescript failed\n"));
4094 /* disable an eventscript
4096 int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4102 data.dsize = strlen(script) + 1;
4103 data.dptr = discard_const(script);
4105 ret = ctdb_control(ctdb, destnode, 0,
4106 CTDB_CONTROL_DISABLE_SCRIPT, 0, data,
4107 NULL, NULL, &res, &timeout, NULL);
4108 if (ret != 0 || res != 0) {
4109 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disablescript failed\n"));
4117 int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime)
4123 data.dsize = sizeof(*bantime);
4124 data.dptr = (uint8_t *)bantime;
4126 ret = ctdb_control(ctdb, destnode, 0,
4127 CTDB_CONTROL_SET_BAN_STATE, 0, data,
4128 NULL, NULL, &res, &timeout, NULL);
4129 if (ret != 0 || res != 0) {
4130 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4138 int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime)
4143 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4145 ret = ctdb_control(ctdb, destnode, 0,
4146 CTDB_CONTROL_GET_BAN_STATE, 0, tdb_null,
4147 tmp_ctx, &outdata, &res, &timeout, NULL);
4148 if (ret != 0 || res != 0) {
4149 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4150 talloc_free(tmp_ctx);
4154 *bantime = (struct ctdb_ban_time *)talloc_steal(mem_ctx, outdata.dptr);
4155 talloc_free(tmp_ctx);
4161 int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio)
4166 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4168 data.dptr = (uint8_t*)db_prio;
4169 data.dsize = sizeof(*db_prio);
4171 ret = ctdb_control(ctdb, destnode, 0,
4172 CTDB_CONTROL_SET_DB_PRIORITY, 0, data,
4173 tmp_ctx, NULL, &res, &timeout, NULL);
4174 if (ret != 0 || res != 0) {
4175 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4176 talloc_free(tmp_ctx);
4180 talloc_free(tmp_ctx);
4185 int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority)
4190 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4192 data.dptr = (uint8_t*)&db_id;
4193 data.dsize = sizeof(db_id);
4195 ret = ctdb_control(ctdb, destnode, 0,
4196 CTDB_CONTROL_GET_DB_PRIORITY, 0, data,
4197 tmp_ctx, NULL, &res, &timeout, NULL);
4198 if (ret != 0 || res < 0) {
4199 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4200 talloc_free(tmp_ctx);
4208 talloc_free(tmp_ctx);
4213 int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats)
4219 ret = ctdb_control(ctdb, destnode, 0,
4220 CTDB_CONTROL_GET_STAT_HISTORY, 0, tdb_null,
4221 mem_ctx, &outdata, &res, &timeout, NULL);
4222 if (ret != 0 || res != 0 || outdata.dsize == 0) {
4223 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getstathistory failed ret:%d res:%d\n", ret, res));
4227 *stats = (struct ctdb_statistics_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
4228 talloc_free(outdata.dptr);