4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
23 #include "lib/tdb/include/tdb.h"
24 #include "lib/util/dlinklist.h"
25 #include "lib/tevent/tevent.h"
26 #include "system/network.h"
27 #include "system/filesys.h"
28 #include "system/locale.h"
30 #include "../include/ctdb_private.h"
31 #include "lib/util/dlinklist.h"
36 allocate a packet for use in client<->daemon communication
38 struct ctdb_req_header *_ctdbd_allocate_pkt(struct ctdb_context *ctdb,
40 enum ctdb_operation operation,
41 size_t length, size_t slength,
45 struct ctdb_req_header *hdr;
47 length = MAX(length, slength);
48 size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
50 hdr = (struct ctdb_req_header *)talloc_size(mem_ctx, size);
52 DEBUG(DEBUG_ERR,("Unable to allocate packet for operation %u of length %u\n",
53 operation, (unsigned)length));
56 talloc_set_name_const(hdr, type);
57 memset(hdr, 0, slength);
59 hdr->operation = operation;
60 hdr->ctdb_magic = CTDB_MAGIC;
61 hdr->ctdb_version = CTDB_VERSION;
62 hdr->srcnode = ctdb->pnn;
64 hdr->generation = ctdb->vnn_map->generation;
71 local version of ctdb_call
73 int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
74 struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
75 TDB_DATA *data, uint32_t caller)
77 struct ctdb_call_info *c;
78 struct ctdb_registered_call *fn;
79 struct ctdb_context *ctdb = ctdb_db->ctdb;
81 c = talloc(ctdb, struct ctdb_call_info);
82 CTDB_NO_MEMORY(ctdb, c);
85 c->call_data = &call->call_data;
86 c->record_data.dptr = talloc_memdup(c, data->dptr, data->dsize);
87 c->record_data.dsize = data->dsize;
88 CTDB_NO_MEMORY(ctdb, c->record_data.dptr);
93 for (fn=ctdb_db->calls;fn;fn=fn->next) {
94 if (fn->id == call->call_id) break;
97 ctdb_set_error(ctdb, "Unknown call id %u\n", call->call_id);
102 if (fn->fn(c) != 0) {
103 ctdb_set_error(ctdb, "ctdb_call %u failed\n", call->call_id);
108 if (header->laccessor != caller) {
111 header->laccessor = caller;
114 /* we need to force the record to be written out if this was a remote access,
115 so that the lacount is updated */
116 if (c->new_data == NULL && header->laccessor != ctdb->pnn) {
117 c->new_data = &c->record_data;
121 /* XXX check that we always have the lock here? */
122 if (ctdb_ltdb_store(ctdb_db, call->key, header, *c->new_data) != 0) {
123 ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
130 call->reply_data = *c->reply_data;
132 talloc_steal(call, call->reply_data.dptr);
133 talloc_set_name_const(call->reply_data.dptr, __location__);
135 call->reply_data.dptr = NULL;
136 call->reply_data.dsize = 0;
138 call->status = c->status;
147 queue a packet for sending from client to daemon
149 static int ctdb_client_queue_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
151 return ctdb_queue_send(ctdb->daemon.queue, (uint8_t *)hdr, hdr->length);
156 called when a CTDB_REPLY_CALL packet comes in in the client
158 This packet comes in response to a CTDB_REQ_CALL request packet. It
159 contains any reply data from the call
161 static void ctdb_client_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
163 struct ctdb_reply_call *c = (struct ctdb_reply_call *)hdr;
164 struct ctdb_client_call_state *state;
166 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_call_state);
168 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
172 if (hdr->reqid != state->reqid) {
173 /* we found a record but it was the wrong one */
174 DEBUG(DEBUG_ERR, ("Dropped client call reply with reqid:%u\n",hdr->reqid));
178 state->call->reply_data.dptr = c->data;
179 state->call->reply_data.dsize = c->datalen;
180 state->call->status = c->status;
182 talloc_steal(state, c);
184 state->state = CTDB_CALL_DONE;
186 if (state->async.fn) {
187 state->async.fn(state);
191 static void ctdb_client_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
194 this is called in the client, when data comes in from the daemon
196 static void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
198 struct ctdb_context *ctdb = talloc_get_type(args, struct ctdb_context);
199 struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
202 /* place the packet as a child of a tmp_ctx. We then use
203 talloc_free() below to free it. If any of the calls want
204 to keep it, then they will steal it somewhere else, and the
205 talloc_free() will be a no-op */
206 tmp_ctx = talloc_new(ctdb);
207 talloc_steal(tmp_ctx, hdr);
210 DEBUG(DEBUG_INFO,("Daemon has exited - shutting down client\n"));
214 if (cnt < sizeof(*hdr)) {
215 DEBUG(DEBUG_CRIT,("Bad packet length %u in client\n", (unsigned)cnt));
218 if (cnt != hdr->length) {
219 ctdb_set_error(ctdb, "Bad header length %u expected %u in client\n",
220 (unsigned)hdr->length, (unsigned)cnt);
224 if (hdr->ctdb_magic != CTDB_MAGIC) {
225 ctdb_set_error(ctdb, "Non CTDB packet rejected in client\n");
229 if (hdr->ctdb_version != CTDB_VERSION) {
230 ctdb_set_error(ctdb, "Bad CTDB version 0x%x rejected in client\n", hdr->ctdb_version);
234 switch (hdr->operation) {
235 case CTDB_REPLY_CALL:
236 ctdb_client_reply_call(ctdb, hdr);
239 case CTDB_REQ_MESSAGE:
240 ctdb_request_message(ctdb, hdr);
243 case CTDB_REPLY_CONTROL:
244 ctdb_client_reply_control(ctdb, hdr);
248 DEBUG(DEBUG_CRIT,("bogus operation code:%u\n",hdr->operation));
252 talloc_free(tmp_ctx);
256 connect to a unix domain socket
258 int ctdb_socket_connect(struct ctdb_context *ctdb)
260 struct sockaddr_un addr;
262 memset(&addr, 0, sizeof(addr));
263 addr.sun_family = AF_UNIX;
264 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
266 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
267 if (ctdb->daemon.sd == -1) {
268 DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
272 set_nonblocking(ctdb->daemon.sd);
273 set_close_on_exec(ctdb->daemon.sd);
275 if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
276 close(ctdb->daemon.sd);
277 ctdb->daemon.sd = -1;
278 DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon. Errno:%s(%d)\n", strerror(errno), errno));
282 ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
284 ctdb_client_read_cb, ctdb, "to-ctdbd");
289 struct ctdb_record_handle {
290 struct ctdb_db_context *ctdb_db;
293 struct ctdb_ltdb_header header;
298 make a recv call to the local ctdb daemon - called from client context
300 This is called when the program wants to wait for a ctdb_call to complete and get the
301 results. This call will block unless the call has already completed.
303 int ctdb_call_recv(struct ctdb_client_call_state *state, struct ctdb_call *call)
309 while (state->state < CTDB_CALL_DONE) {
310 event_loop_once(state->ctdb_db->ctdb->ev);
312 if (state->state != CTDB_CALL_DONE) {
313 DEBUG(DEBUG_ERR,(__location__ " ctdb_call_recv failed\n"));
318 if (state->call->reply_data.dsize) {
319 call->reply_data.dptr = talloc_memdup(state->ctdb_db,
320 state->call->reply_data.dptr,
321 state->call->reply_data.dsize);
322 call->reply_data.dsize = state->call->reply_data.dsize;
324 call->reply_data.dptr = NULL;
325 call->reply_data.dsize = 0;
327 call->status = state->call->status;
337 destroy a ctdb_call in client
339 static int ctdb_client_call_destructor(struct ctdb_client_call_state *state)
341 ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
346 construct an event driven local ctdb_call
348 this is used so that locally processed ctdb_call requests are processed
349 in an event driven manner
351 static struct ctdb_client_call_state *ctdb_client_call_local_send(struct ctdb_db_context *ctdb_db,
352 struct ctdb_call *call,
353 struct ctdb_ltdb_header *header,
356 struct ctdb_client_call_state *state;
357 struct ctdb_context *ctdb = ctdb_db->ctdb;
360 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
361 CTDB_NO_MEMORY_NULL(ctdb, state);
362 state->call = talloc_zero(state, struct ctdb_call);
363 CTDB_NO_MEMORY_NULL(ctdb, state->call);
365 talloc_steal(state, data->dptr);
367 state->state = CTDB_CALL_DONE;
368 *(state->call) = *call;
369 state->ctdb_db = ctdb_db;
371 ret = ctdb_call_local(ctdb_db, state->call, header, state, data, ctdb->pnn);
377 make a ctdb call to the local daemon - async send. Called from client context.
379 This constructs a ctdb_call request and queues it for processing.
380 This call never blocks.
382 struct ctdb_client_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db,
383 struct ctdb_call *call)
385 struct ctdb_client_call_state *state;
386 struct ctdb_context *ctdb = ctdb_db->ctdb;
387 struct ctdb_ltdb_header header;
391 struct ctdb_req_call *c;
393 /* if the domain socket is not yet open, open it */
394 if (ctdb->daemon.sd==-1) {
395 ctdb_socket_connect(ctdb);
398 ret = ctdb_ltdb_lock(ctdb_db, call->key);
400 DEBUG(DEBUG_ERR,(__location__ " Failed to get chainlock\n"));
404 ret = ctdb_ltdb_fetch(ctdb_db, call->key, &header, ctdb_db, &data);
406 if (ret == 0 && header.dmaster == ctdb->pnn) {
407 state = ctdb_client_call_local_send(ctdb_db, call, &header, &data);
408 talloc_free(data.dptr);
409 ctdb_ltdb_unlock(ctdb_db, call->key);
413 ctdb_ltdb_unlock(ctdb_db, call->key);
414 talloc_free(data.dptr);
416 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
418 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state\n"));
421 state->call = talloc_zero(state, struct ctdb_call);
422 if (state->call == NULL) {
423 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state->call\n"));
427 len = offsetof(struct ctdb_req_call, data) + call->key.dsize + call->call_data.dsize;
428 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CALL, len, struct ctdb_req_call);
430 DEBUG(DEBUG_ERR, (__location__ " failed to allocate packet\n"));
434 state->reqid = ctdb_reqid_new(ctdb, state);
435 state->ctdb_db = ctdb_db;
436 talloc_set_destructor(state, ctdb_client_call_destructor);
438 c->hdr.reqid = state->reqid;
439 c->flags = call->flags;
440 c->db_id = ctdb_db->db_id;
441 c->callid = call->call_id;
443 c->keylen = call->key.dsize;
444 c->calldatalen = call->call_data.dsize;
445 memcpy(&c->data[0], call->key.dptr, call->key.dsize);
446 memcpy(&c->data[call->key.dsize],
447 call->call_data.dptr, call->call_data.dsize);
448 *(state->call) = *call;
449 state->call->call_data.dptr = &c->data[call->key.dsize];
450 state->call->key.dptr = &c->data[0];
452 state->state = CTDB_CALL_WAIT;
455 ctdb_client_queue_pkt(ctdb, &c->hdr);
462 full ctdb_call. Equivalent to a ctdb_call_send() followed by a ctdb_call_recv()
464 int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
466 struct ctdb_client_call_state *state;
468 state = ctdb_call_send(ctdb_db, call);
469 return ctdb_call_recv(state, call);
474 tell the daemon what messaging srvid we will use, and register the message
475 handler function in the client
477 int ctdb_client_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
478 ctdb_msg_fn_t handler,
485 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_REGISTER_SRVID, 0,
486 tdb_null, NULL, NULL, &status, NULL, NULL);
487 if (res != 0 || status != 0) {
488 DEBUG(DEBUG_ERR,("Failed to register srvid %llu\n", (unsigned long long)srvid));
492 /* also need to register the handler with our own ctdb structure */
493 return ctdb_register_message_handler(ctdb, ctdb, srvid, handler, private_data);
497 tell the daemon we no longer want a srvid
499 int ctdb_client_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
504 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_DEREGISTER_SRVID, 0,
505 tdb_null, NULL, NULL, &status, NULL, NULL);
506 if (res != 0 || status != 0) {
507 DEBUG(DEBUG_ERR,("Failed to deregister srvid %llu\n", (unsigned long long)srvid));
511 /* also need to register the handler with our own ctdb structure */
512 ctdb_deregister_message_handler(ctdb, srvid, private_data);
518 send a message - from client context
520 int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
521 uint64_t srvid, TDB_DATA data)
523 struct ctdb_req_message *r;
526 len = offsetof(struct ctdb_req_message, data) + data.dsize;
527 r = ctdbd_allocate_pkt(ctdb, ctdb, CTDB_REQ_MESSAGE,
528 len, struct ctdb_req_message);
529 CTDB_NO_MEMORY(ctdb, r);
531 r->hdr.destnode = pnn;
533 r->datalen = data.dsize;
534 memcpy(&r->data[0], data.dptr, data.dsize);
536 res = ctdb_client_queue_pkt(ctdb, &r->hdr);
547 cancel a ctdb_fetch_lock operation, releasing the lock
549 static int fetch_lock_destructor(struct ctdb_record_handle *h)
551 ctdb_ltdb_unlock(h->ctdb_db, h->key);
556 force the migration of a record to this node
558 static int ctdb_client_force_migration(struct ctdb_db_context *ctdb_db, TDB_DATA key)
560 struct ctdb_call call;
562 call.call_id = CTDB_NULL_FUNC;
564 call.flags = CTDB_IMMEDIATE_MIGRATION;
565 return ctdb_call(ctdb_db, &call);
569 get a lock on a record, and return the records data. Blocks until it gets the lock
571 struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
572 TDB_DATA key, TDB_DATA *data)
575 struct ctdb_record_handle *h;
578 procedure is as follows:
580 1) get the chain lock.
581 2) check if we are dmaster
582 3) if we are the dmaster then return handle
583 4) if not dmaster then ask ctdb daemon to make us dmaster, and wait for
585 5) when we get the reply, goto (1)
588 h = talloc_zero(mem_ctx, struct ctdb_record_handle);
593 h->ctdb_db = ctdb_db;
595 h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
596 if (h->key.dptr == NULL) {
602 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: key=%*.*s\n", (int)key.dsize, (int)key.dsize,
603 (const char *)key.dptr));
606 /* step 1 - get the chain lock */
607 ret = ctdb_ltdb_lock(ctdb_db, key);
609 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
614 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: got chain lock\n"));
616 talloc_set_destructor(h, fetch_lock_destructor);
618 ret = ctdb_ltdb_fetch(ctdb_db, key, &h->header, h, data);
620 /* when torturing, ensure we test the remote path */
621 if ((ctdb_db->ctdb->flags & CTDB_FLAG_TORTURE) &&
623 h->header.dmaster = (uint32_t)-1;
627 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: done local fetch\n"));
629 if (ret != 0 || h->header.dmaster != ctdb_db->ctdb->pnn) {
630 ctdb_ltdb_unlock(ctdb_db, key);
631 ret = ctdb_client_force_migration(ctdb_db, key);
633 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
640 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: we are dmaster - done\n"));
645 store some data to the record that was locked with ctdb_fetch_lock()
647 int ctdb_record_store(struct ctdb_record_handle *h, TDB_DATA data)
649 if (h->ctdb_db->persistent) {
650 DEBUG(DEBUG_ERR, (__location__ " ctdb_record_store prohibited for persistent dbs\n"));
654 return ctdb_ltdb_store(h->ctdb_db, h->key, &h->header, data);
658 non-locking fetch of a record
660 int ctdb_fetch(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
661 TDB_DATA key, TDB_DATA *data)
663 struct ctdb_call call;
666 call.call_id = CTDB_FETCH_FUNC;
667 call.call_data.dptr = NULL;
668 call.call_data.dsize = 0;
670 ret = ctdb_call(ctdb_db, &call);
673 *data = call.reply_data;
674 talloc_steal(mem_ctx, data->dptr);
683 called when a control completes or timesout to invoke the callback
684 function the user provided
686 static void invoke_control_callback(struct event_context *ev, struct timed_event *te,
687 struct timeval t, void *private_data)
689 struct ctdb_client_control_state *state;
690 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
693 state = talloc_get_type(private_data, struct ctdb_client_control_state);
694 talloc_steal(tmp_ctx, state);
696 ret = ctdb_control_recv(state->ctdb, state, state,
701 talloc_free(tmp_ctx);
705 called when a CTDB_REPLY_CONTROL packet comes in in the client
707 This packet comes in response to a CTDB_REQ_CONTROL request packet. It
708 contains any reply data from the control
710 static void ctdb_client_reply_control(struct ctdb_context *ctdb,
711 struct ctdb_req_header *hdr)
713 struct ctdb_reply_control *c = (struct ctdb_reply_control *)hdr;
714 struct ctdb_client_control_state *state;
716 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_control_state);
718 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
722 if (hdr->reqid != state->reqid) {
723 /* we found a record but it was the wrong one */
724 DEBUG(DEBUG_ERR, ("Dropped orphaned reply control with reqid:%u\n",hdr->reqid));
728 state->outdata.dptr = c->data;
729 state->outdata.dsize = c->datalen;
730 state->status = c->status;
732 state->errormsg = talloc_strndup(state,
733 (char *)&c->data[c->datalen],
737 /* state->outdata now uses resources from c so we dont want c
738 to just dissappear from under us while state is still alive
740 talloc_steal(state, c);
742 state->state = CTDB_CONTROL_DONE;
744 /* if we had a callback registered for this control, pull the response
745 and call the callback.
747 if (state->async.fn) {
748 event_add_timed(ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
754 destroy a ctdb_control in client
756 static int ctdb_control_destructor(struct ctdb_client_control_state *state)
758 ctdb_reqid_remove(state->ctdb, state->reqid);
763 /* time out handler for ctdb_control */
764 static void control_timeout_func(struct event_context *ev, struct timed_event *te,
765 struct timeval t, void *private_data)
767 struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
769 DEBUG(DEBUG_ERR,(__location__ " control timed out. reqid:%u opcode:%u "
770 "dstnode:%u\n", state->reqid, state->c->opcode,
771 state->c->hdr.destnode));
773 state->state = CTDB_CONTROL_TIMEOUT;
775 /* if we had a callback registered for this control, pull the response
776 and call the callback.
778 if (state->async.fn) {
779 event_add_timed(state->ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
783 /* async version of send control request */
784 struct ctdb_client_control_state *ctdb_control_send(struct ctdb_context *ctdb,
785 uint32_t destnode, uint64_t srvid,
786 uint32_t opcode, uint32_t flags, TDB_DATA data,
788 struct timeval *timeout,
791 struct ctdb_client_control_state *state;
793 struct ctdb_req_control *c;
800 /* if the domain socket is not yet open, open it */
801 if (ctdb->daemon.sd==-1) {
802 ctdb_socket_connect(ctdb);
805 state = talloc_zero(mem_ctx, struct ctdb_client_control_state);
806 CTDB_NO_MEMORY_NULL(ctdb, state);
809 state->reqid = ctdb_reqid_new(ctdb, state);
810 state->state = CTDB_CONTROL_WAIT;
811 state->errormsg = NULL;
813 talloc_set_destructor(state, ctdb_control_destructor);
815 len = offsetof(struct ctdb_req_control, data) + data.dsize;
816 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CONTROL,
817 len, struct ctdb_req_control);
819 CTDB_NO_MEMORY_NULL(ctdb, c);
820 c->hdr.reqid = state->reqid;
821 c->hdr.destnode = destnode;
826 c->datalen = data.dsize;
828 memcpy(&c->data[0], data.dptr, data.dsize);
832 if (timeout && !timeval_is_zero(timeout)) {
833 event_add_timed(ctdb->ev, state, *timeout, control_timeout_func, state);
836 ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
842 if (flags & CTDB_CTRL_FLAG_NOREPLY) {
851 /* async version of receive control reply */
852 int ctdb_control_recv(struct ctdb_context *ctdb,
853 struct ctdb_client_control_state *state,
855 TDB_DATA *outdata, int32_t *status, char **errormsg)
859 if (status != NULL) {
862 if (errormsg != NULL) {
870 /* prevent double free of state */
871 tmp_ctx = talloc_new(ctdb);
872 talloc_steal(tmp_ctx, state);
874 /* loop one event at a time until we either timeout or the control
877 while (state->state == CTDB_CONTROL_WAIT) {
878 event_loop_once(ctdb->ev);
881 if (state->state != CTDB_CONTROL_DONE) {
882 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_recv failed\n"));
883 if (state->async.fn) {
884 state->async.fn(state);
886 talloc_free(tmp_ctx);
890 if (state->errormsg) {
891 DEBUG(DEBUG_ERR,("ctdb_control error: '%s'\n", state->errormsg));
893 (*errormsg) = talloc_move(mem_ctx, &state->errormsg);
895 if (state->async.fn) {
896 state->async.fn(state);
898 talloc_free(tmp_ctx);
903 *outdata = state->outdata;
904 outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
908 *status = state->status;
911 if (state->async.fn) {
912 state->async.fn(state);
915 talloc_free(tmp_ctx);
922 send a ctdb control message
923 timeout specifies how long we should wait for a reply.
924 if timeout is NULL we wait indefinitely
926 int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
927 uint32_t opcode, uint32_t flags, TDB_DATA data,
928 TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
929 struct timeval *timeout,
932 struct ctdb_client_control_state *state;
934 state = ctdb_control_send(ctdb, destnode, srvid, opcode,
935 flags, data, mem_ctx,
937 return ctdb_control_recv(ctdb, state, mem_ctx, outdata, status,
945 a process exists call. Returns 0 if process exists, -1 otherwise
947 int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t pid)
953 data.dptr = (uint8_t*)&pid;
954 data.dsize = sizeof(pid);
956 ret = ctdb_control(ctdb, destnode, 0,
957 CTDB_CONTROL_PROCESS_EXISTS, 0, data,
958 NULL, NULL, &status, NULL, NULL);
960 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for process_exists failed\n"));
968 get remote statistics
970 int ctdb_ctrl_statistics(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_statistics *status)
976 ret = ctdb_control(ctdb, destnode, 0,
977 CTDB_CONTROL_STATISTICS, 0, tdb_null,
978 ctdb, &data, &res, NULL, NULL);
979 if (ret != 0 || res != 0) {
980 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for statistics failed\n"));
984 if (data.dsize != sizeof(struct ctdb_statistics)) {
985 DEBUG(DEBUG_ERR,(__location__ " Wrong statistics size %u - expected %u\n",
986 (unsigned)data.dsize, (unsigned)sizeof(struct ctdb_statistics)));
990 *status = *(struct ctdb_statistics *)data.dptr;
991 talloc_free(data.dptr);
997 shutdown a remote ctdb node
999 int ctdb_ctrl_shutdown(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
1001 struct ctdb_client_control_state *state;
1003 state = ctdb_control_send(ctdb, destnode, 0,
1004 CTDB_CONTROL_SHUTDOWN, 0, tdb_null,
1005 NULL, &timeout, NULL);
1006 if (state == NULL) {
1007 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for shutdown failed\n"));
1015 get vnn map from a remote node
1017 int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap)
1022 struct ctdb_vnn_map_wire *map;
1024 ret = ctdb_control(ctdb, destnode, 0,
1025 CTDB_CONTROL_GETVNNMAP, 0, tdb_null,
1026 mem_ctx, &outdata, &res, &timeout, NULL);
1027 if (ret != 0 || res != 0) {
1028 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getvnnmap failed\n"));
1032 map = (struct ctdb_vnn_map_wire *)outdata.dptr;
1033 if (outdata.dsize < offsetof(struct ctdb_vnn_map_wire, map) ||
1034 outdata.dsize != map->size*sizeof(uint32_t) + offsetof(struct ctdb_vnn_map_wire, map)) {
1035 DEBUG(DEBUG_ERR,("Bad vnn map size received in ctdb_ctrl_getvnnmap\n"));
1039 (*vnnmap) = talloc(mem_ctx, struct ctdb_vnn_map);
1040 CTDB_NO_MEMORY(ctdb, *vnnmap);
1041 (*vnnmap)->generation = map->generation;
1042 (*vnnmap)->size = map->size;
1043 (*vnnmap)->map = talloc_array(*vnnmap, uint32_t, map->size);
1045 CTDB_NO_MEMORY(ctdb, (*vnnmap)->map);
1046 memcpy((*vnnmap)->map, map->map, sizeof(uint32_t)*map->size);
1047 talloc_free(outdata.dptr);
1054 get the recovery mode of a remote node
1056 struct ctdb_client_control_state *
1057 ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
1059 return ctdb_control_send(ctdb, destnode, 0,
1060 CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
1061 mem_ctx, &timeout, NULL);
1064 int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode)
1069 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1071 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmode_recv failed\n"));
1076 *recmode = (uint32_t)res;
1082 int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
1084 struct ctdb_client_control_state *state;
1086 state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, timeout, destnode);
1087 return ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, state, recmode);
1094 set the recovery mode of a remote node
1096 int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode)
1102 data.dsize = sizeof(uint32_t);
1103 data.dptr = (unsigned char *)&recmode;
1105 ret = ctdb_control(ctdb, destnode, 0,
1106 CTDB_CONTROL_SET_RECMODE, 0, data,
1107 NULL, NULL, &res, &timeout, NULL);
1108 if (ret != 0 || res != 0) {
1109 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmode failed\n"));
1119 get the recovery master of a remote node
1121 struct ctdb_client_control_state *
1122 ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
1123 struct timeval timeout, uint32_t destnode)
1125 return ctdb_control_send(ctdb, destnode, 0,
1126 CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
1127 mem_ctx, &timeout, NULL);
1130 int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster)
1135 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1137 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmaster_recv failed\n"));
1142 *recmaster = (uint32_t)res;
1148 int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
1150 struct ctdb_client_control_state *state;
1152 state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx, timeout, destnode);
1153 return ctdb_ctrl_getrecmaster_recv(ctdb, mem_ctx, state, recmaster);
1158 set the recovery master of a remote node
1160 int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster)
1167 data.dsize = sizeof(uint32_t);
1168 data.dptr = (unsigned char *)&recmaster;
1170 ret = ctdb_control(ctdb, destnode, 0,
1171 CTDB_CONTROL_SET_RECMASTER, 0, data,
1172 NULL, NULL, &res, &timeout, NULL);
1173 if (ret != 0 || res != 0) {
1174 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmaster failed\n"));
1183 get a list of databases off a remote node
1185 int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1186 TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap)
1192 ret = ctdb_control(ctdb, destnode, 0,
1193 CTDB_CONTROL_GET_DBMAP, 0, tdb_null,
1194 mem_ctx, &outdata, &res, &timeout, NULL);
1195 if (ret != 0 || res != 0) {
1196 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res));
1200 *dbmap = (struct ctdb_dbid_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1201 talloc_free(outdata.dptr);
1207 get a list of nodes (vnn and flags ) from a remote node
1209 int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
1210 struct timeval timeout, uint32_t destnode,
1211 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1217 ret = ctdb_control(ctdb, destnode, 0,
1218 CTDB_CONTROL_GET_NODEMAP, 0, tdb_null,
1219 mem_ctx, &outdata, &res, &timeout, NULL);
1220 if (ret == 0 && res == -1 && outdata.dsize == 0) {
1221 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed, falling back to ipv4-only control\n"));
1222 return ctdb_ctrl_getnodemapv4(ctdb, timeout, destnode, mem_ctx, nodemap);
1224 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1225 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed ret:%d res:%d\n", ret, res));
1229 *nodemap = (struct ctdb_node_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1230 talloc_free(outdata.dptr);
1236 old style ipv4-only get a list of nodes (vnn and flags ) from a remote node
1238 int ctdb_ctrl_getnodemapv4(struct ctdb_context *ctdb,
1239 struct timeval timeout, uint32_t destnode,
1240 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1244 struct ctdb_node_mapv4 *nodemapv4;
1247 ret = ctdb_control(ctdb, destnode, 0,
1248 CTDB_CONTROL_GET_NODEMAPv4, 0, tdb_null,
1249 mem_ctx, &outdata, &res, &timeout, NULL);
1250 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1251 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodesv4 failed ret:%d res:%d\n", ret, res));
1255 nodemapv4 = (struct ctdb_node_mapv4 *)outdata.dptr;
1257 len = offsetof(struct ctdb_node_map, nodes) + nodemapv4->num*sizeof(struct ctdb_node_and_flags);
1258 (*nodemap) = talloc_zero_size(mem_ctx, len);
1259 CTDB_NO_MEMORY(ctdb, (*nodemap));
1261 (*nodemap)->num = nodemapv4->num;
1262 for (i=0; i<nodemapv4->num; i++) {
1263 (*nodemap)->nodes[i].pnn = nodemapv4->nodes[i].pnn;
1264 (*nodemap)->nodes[i].flags = nodemapv4->nodes[i].flags;
1265 (*nodemap)->nodes[i].addr.ip = nodemapv4->nodes[i].sin;
1266 (*nodemap)->nodes[i].addr.sa.sa_family = AF_INET;
1269 talloc_free(outdata.dptr);
1275 drop the transport, reload the nodes file and restart the transport
1277 int ctdb_ctrl_reload_nodes_file(struct ctdb_context *ctdb,
1278 struct timeval timeout, uint32_t destnode)
1283 ret = ctdb_control(ctdb, destnode, 0,
1284 CTDB_CONTROL_RELOAD_NODES_FILE, 0, tdb_null,
1285 NULL, NULL, &res, &timeout, NULL);
1286 if (ret != 0 || res != 0) {
1287 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reloadnodesfile failed\n"));
1296 set vnn map on a node
1298 int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1299 TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap)
1304 struct ctdb_vnn_map_wire *map;
1307 len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*vnnmap->size;
1308 map = talloc_size(mem_ctx, len);
1309 CTDB_NO_MEMORY(ctdb, map);
1311 map->generation = vnnmap->generation;
1312 map->size = vnnmap->size;
1313 memcpy(map->map, vnnmap->map, sizeof(uint32_t)*map->size);
1316 data.dptr = (uint8_t *)map;
1318 ret = ctdb_control(ctdb, destnode, 0,
1319 CTDB_CONTROL_SETVNNMAP, 0, data,
1320 NULL, NULL, &res, &timeout, NULL);
1321 if (ret != 0 || res != 0) {
1322 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setvnnmap failed\n"));
1333 async send for pull database
1335 struct ctdb_client_control_state *ctdb_ctrl_pulldb_send(
1336 struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
1337 uint32_t lmaster, TALLOC_CTX *mem_ctx, struct timeval timeout)
1340 struct ctdb_control_pulldb *pull;
1341 struct ctdb_client_control_state *state;
1343 pull = talloc(mem_ctx, struct ctdb_control_pulldb);
1344 CTDB_NO_MEMORY_NULL(ctdb, pull);
1347 pull->lmaster = lmaster;
1349 indata.dsize = sizeof(struct ctdb_control_pulldb);
1350 indata.dptr = (unsigned char *)pull;
1352 state = ctdb_control_send(ctdb, destnode, 0,
1353 CTDB_CONTROL_PULL_DB, 0, indata,
1354 mem_ctx, &timeout, NULL);
1361 async recv for pull database
1363 int ctdb_ctrl_pulldb_recv(
1364 struct ctdb_context *ctdb,
1365 TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state,
1371 ret = ctdb_control_recv(ctdb, state, mem_ctx, outdata, &res, NULL);
1372 if ( (ret != 0) || (res != 0) ){
1373 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_pulldb_recv failed\n"));
1381 pull all keys and records for a specific database on a node
1383 int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode,
1384 uint32_t dbid, uint32_t lmaster,
1385 TALLOC_CTX *mem_ctx, struct timeval timeout,
1388 struct ctdb_client_control_state *state;
1390 state = ctdb_ctrl_pulldb_send(ctdb, destnode, dbid, lmaster, mem_ctx,
1393 return ctdb_ctrl_pulldb_recv(ctdb, mem_ctx, state, outdata);
1398 change dmaster for all keys in the database to the new value
1400 int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1401 TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster)
1407 indata.dsize = 2*sizeof(uint32_t);
1408 indata.dptr = (unsigned char *)talloc_array(mem_ctx, uint32_t, 2);
1410 ((uint32_t *)(&indata.dptr[0]))[0] = dbid;
1411 ((uint32_t *)(&indata.dptr[0]))[1] = dmaster;
1413 ret = ctdb_control(ctdb, destnode, 0,
1414 CTDB_CONTROL_SET_DMASTER, 0, indata,
1415 NULL, NULL, &res, &timeout, NULL);
1416 if (ret != 0 || res != 0) {
1417 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setdmaster failed\n"));
1425 ping a node, return number of clients connected
1427 int ctdb_ctrl_ping(struct ctdb_context *ctdb, uint32_t destnode)
1432 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PING, 0,
1433 tdb_null, NULL, NULL, &res, NULL, NULL);
1441 find the real path to a ltdb
1443 int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1450 data.dptr = (uint8_t *)&dbid;
1451 data.dsize = sizeof(dbid);
1453 ret = ctdb_control(ctdb, destnode, 0,
1454 CTDB_CONTROL_GETDBPATH, 0, data,
1455 mem_ctx, &data, &res, &timeout, NULL);
1456 if (ret != 0 || res != 0) {
1460 (*path) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1461 if ((*path) == NULL) {
1465 talloc_free(data.dptr);
1471 find the name of a db
1473 int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1480 data.dptr = (uint8_t *)&dbid;
1481 data.dsize = sizeof(dbid);
1483 ret = ctdb_control(ctdb, destnode, 0,
1484 CTDB_CONTROL_GET_DBNAME, 0, data,
1485 mem_ctx, &data, &res, &timeout, NULL);
1486 if (ret != 0 || res != 0) {
1490 (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1491 if ((*name) == NULL) {
1495 talloc_free(data.dptr);
1501 get the health status of a db
1503 int ctdb_ctrl_getdbhealth(struct ctdb_context *ctdb,
1504 struct timeval timeout,
1506 uint32_t dbid, TALLOC_CTX *mem_ctx,
1507 const char **reason)
1513 data.dptr = (uint8_t *)&dbid;
1514 data.dsize = sizeof(dbid);
1516 ret = ctdb_control(ctdb, destnode, 0,
1517 CTDB_CONTROL_DB_GET_HEALTH, 0, data,
1518 mem_ctx, &data, &res, &timeout, NULL);
1519 if (ret != 0 || res != 0) {
1523 if (data.dsize == 0) {
1528 (*reason) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1529 if ((*reason) == NULL) {
1533 talloc_free(data.dptr);
1541 int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1542 TALLOC_CTX *mem_ctx, const char *name, bool persistent)
1548 data.dptr = discard_const(name);
1549 data.dsize = strlen(name)+1;
1551 ret = ctdb_control(ctdb, destnode, 0,
1552 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1554 mem_ctx, &data, &res, &timeout, NULL);
1556 if (ret != 0 || res != 0) {
1564 get debug level on a node
1566 int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level)
1572 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DEBUG, 0, tdb_null,
1573 ctdb, &data, &res, NULL, NULL);
1574 if (ret != 0 || res != 0) {
1577 if (data.dsize != sizeof(int32_t)) {
1578 DEBUG(DEBUG_ERR,("Bad control reply size in ctdb_get_debuglevel (got %u)\n",
1579 (unsigned)data.dsize));
1582 *level = *(int32_t *)data.dptr;
1583 talloc_free(data.dptr);
1588 set debug level on a node
1590 int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t level)
1596 data.dptr = (uint8_t *)&level;
1597 data.dsize = sizeof(level);
1599 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_DEBUG, 0, data,
1600 NULL, NULL, &res, NULL, NULL);
1601 if (ret != 0 || res != 0) {
1609 get a list of connected nodes
1611 uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
1612 struct timeval timeout,
1613 TALLOC_CTX *mem_ctx,
1614 uint32_t *num_nodes)
1616 struct ctdb_node_map *map=NULL;
1622 ret = ctdb_ctrl_getnodemap(ctdb, timeout, CTDB_CURRENT_NODE, mem_ctx, &map);
1627 nodes = talloc_array(mem_ctx, uint32_t, map->num);
1628 if (nodes == NULL) {
1632 for (i=0;i<map->num;i++) {
1633 if (!(map->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
1634 nodes[*num_nodes] = map->nodes[i].pnn;
1646 int ctdb_statistics_reset(struct ctdb_context *ctdb, uint32_t destnode)
1651 ret = ctdb_control(ctdb, destnode, 0,
1652 CTDB_CONTROL_STATISTICS_RESET, 0, tdb_null,
1653 NULL, NULL, &res, NULL, NULL);
1654 if (ret != 0 || res != 0) {
1655 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reset statistics failed\n"));
1662 this is the dummy null procedure that all databases support
1664 static int ctdb_null_func(struct ctdb_call_info *call)
1670 this is a plain fetch procedure that all databases support
1672 static int ctdb_fetch_func(struct ctdb_call_info *call)
1674 call->reply_data = &call->record_data;
1679 attach to a specific database - client call
1681 struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name, bool persistent, uint32_t tdb_flags)
1683 struct ctdb_db_context *ctdb_db;
1688 ctdb_db = ctdb_db_handle(ctdb, name);
1693 ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
1694 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db);
1696 ctdb_db->ctdb = ctdb;
1697 ctdb_db->db_name = talloc_strdup(ctdb_db, name);
1698 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db->db_name);
1700 data.dptr = discard_const(name);
1701 data.dsize = strlen(name)+1;
1703 /* tell ctdb daemon to attach */
1704 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, tdb_flags,
1705 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1706 0, data, ctdb_db, &data, &res, NULL, NULL);
1707 if (ret != 0 || res != 0 || data.dsize != sizeof(uint32_t)) {
1708 DEBUG(DEBUG_ERR,("Failed to attach to database '%s'\n", name));
1709 talloc_free(ctdb_db);
1713 ctdb_db->db_id = *(uint32_t *)data.dptr;
1714 talloc_free(data.dptr);
1716 ret = ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(2, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
1718 DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name));
1719 talloc_free(ctdb_db);
1723 tdb_flags = persistent?TDB_DEFAULT:TDB_NOSYNC;
1724 if (ctdb->valgrinding) {
1725 tdb_flags |= TDB_NOMMAP;
1727 tdb_flags |= TDB_DISALLOW_NESTING;
1729 ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 0, tdb_flags, O_RDWR, 0);
1730 if (ctdb_db->ltdb == NULL) {
1731 ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path);
1732 talloc_free(ctdb_db);
1736 ctdb_db->persistent = persistent;
1738 DLIST_ADD(ctdb->db_list, ctdb_db);
1740 /* add well known functions */
1741 ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC);
1742 ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC);
1749 setup a call for a database
1751 int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id)
1753 struct ctdb_registered_call *call;
1758 struct ctdb_control_set_call c;
1761 /* this is no longer valid with the separate daemon architecture */
1762 c.db_id = ctdb_db->db_id;
1766 data.dptr = (uint8_t *)&c;
1767 data.dsize = sizeof(c);
1769 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_SET_CALL, 0,
1770 data, NULL, NULL, &status, NULL, NULL);
1771 if (ret != 0 || status != 0) {
1772 DEBUG(DEBUG_ERR,("ctdb_set_call failed for call %u\n", id));
1777 /* also register locally */
1778 call = talloc(ctdb_db, struct ctdb_registered_call);
1782 DLIST_ADD(ctdb_db->calls, call);
1787 struct traverse_state {
1790 ctdb_traverse_func fn;
1795 called on each key during a ctdb_traverse
1797 static void traverse_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data, void *p)
1799 struct traverse_state *state = (struct traverse_state *)p;
1800 struct ctdb_rec_data *d = (struct ctdb_rec_data *)data.dptr;
1803 if (data.dsize < sizeof(uint32_t) ||
1804 d->length != data.dsize) {
1805 DEBUG(DEBUG_ERR,("Bad data size %u in traverse_handler\n", (unsigned)data.dsize));
1810 key.dsize = d->keylen;
1811 key.dptr = &d->data[0];
1812 data.dsize = d->datalen;
1813 data.dptr = &d->data[d->keylen];
1815 if (key.dsize == 0 && data.dsize == 0) {
1816 /* end of traverse */
1821 if (data.dsize == sizeof(struct ctdb_ltdb_header)) {
1822 /* empty records are deleted records in ctdb */
1826 if (state->fn(ctdb, key, data, state->private_data) != 0) {
1835 start a cluster wide traverse, calling the supplied fn on each record
1836 return the number of records traversed, or -1 on error
1838 int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *private_data)
1841 struct ctdb_traverse_start t;
1844 uint64_t srvid = (getpid() | 0xFLL<<60);
1845 struct traverse_state state;
1849 state.private_data = private_data;
1852 ret = ctdb_client_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
1854 DEBUG(DEBUG_ERR,("Failed to setup traverse handler\n"));
1858 t.db_id = ctdb_db->db_id;
1862 data.dptr = (uint8_t *)&t;
1863 data.dsize = sizeof(t);
1865 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_TRAVERSE_START, 0,
1866 data, NULL, NULL, &status, NULL, NULL);
1867 if (ret != 0 || status != 0) {
1868 DEBUG(DEBUG_ERR,("ctdb_traverse_all failed\n"));
1869 ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
1873 while (!state.done) {
1874 event_loop_once(ctdb_db->ctdb->ev);
1877 ret = ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
1879 DEBUG(DEBUG_ERR,("Failed to remove ctdb_traverse handler\n"));
1886 #define ISASCII(x) ((x>31)&&(x<128))
1888 called on each key during a catdb
1890 int ctdb_dumpdb_record(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
1893 FILE *f = (FILE *)p;
1894 struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
1896 fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
1897 for (i=0;i<key.dsize;i++) {
1898 if (ISASCII(key.dptr[i])) {
1899 fprintf(f, "%c", key.dptr[i]);
1901 fprintf(f, "\\%02X", key.dptr[i]);
1906 fprintf(f, "dmaster: %u\n", h->dmaster);
1907 fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
1909 fprintf(f, "data(%u) = \"", (unsigned)(data.dsize - sizeof(*h)));
1910 for (i=sizeof(*h);i<data.dsize;i++) {
1911 if (ISASCII(data.dptr[i])) {
1912 fprintf(f, "%c", data.dptr[i]);
1914 fprintf(f, "\\%02X", data.dptr[i]);
1925 convenience function to list all keys to stdout
1927 int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f)
1929 return ctdb_traverse(ctdb_db, ctdb_dumpdb_record, f);
1933 get the pid of a ctdb daemon
1935 int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid)
1940 ret = ctdb_control(ctdb, destnode, 0,
1941 CTDB_CONTROL_GET_PID, 0, tdb_null,
1942 NULL, NULL, &res, &timeout, NULL);
1944 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpid failed\n"));
1955 async freeze send control
1957 struct ctdb_client_control_state *
1958 ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t priority)
1960 return ctdb_control_send(ctdb, destnode, priority,
1961 CTDB_CONTROL_FREEZE, 0, tdb_null,
1962 mem_ctx, &timeout, NULL);
1966 async freeze recv control
1968 int ctdb_ctrl_freeze_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state)
1973 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1974 if ( (ret != 0) || (res != 0) ){
1975 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_freeze_recv failed\n"));
1983 freeze databases of a certain priority
1985 int ctdb_ctrl_freeze_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
1987 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1988 struct ctdb_client_control_state *state;
1991 state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode, priority);
1992 ret = ctdb_ctrl_freeze_recv(ctdb, tmp_ctx, state);
1993 talloc_free(tmp_ctx);
1998 /* Freeze all databases */
1999 int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2003 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
2004 if (ctdb_ctrl_freeze_priority(ctdb, timeout, destnode, i) != 0) {
2012 thaw databases of a certain priority
2014 int ctdb_ctrl_thaw_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
2019 ret = ctdb_control(ctdb, destnode, priority,
2020 CTDB_CONTROL_THAW, 0, tdb_null,
2021 NULL, NULL, &res, &timeout, NULL);
2022 if (ret != 0 || res != 0) {
2023 DEBUG(DEBUG_ERR,(__location__ " ctdb_control thaw failed\n"));
2030 /* thaw all databases */
2031 int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2033 return ctdb_ctrl_thaw_priority(ctdb, timeout, destnode, 0);
2037 get pnn of a node, or -1
2039 int ctdb_ctrl_getpnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2044 ret = ctdb_control(ctdb, destnode, 0,
2045 CTDB_CONTROL_GET_PNN, 0, tdb_null,
2046 NULL, NULL, &res, &timeout, NULL);
2048 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpnn failed\n"));
2056 get the monitoring mode of a remote node
2058 int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *monmode)
2063 ret = ctdb_control(ctdb, destnode, 0,
2064 CTDB_CONTROL_GET_MONMODE, 0, tdb_null,
2065 NULL, NULL, &res, &timeout, NULL);
2067 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getmonmode failed\n"));
2078 set the monitoring mode of a remote node to active
2080 int ctdb_ctrl_enable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2085 ret = ctdb_control(ctdb, destnode, 0,
2086 CTDB_CONTROL_ENABLE_MONITOR, 0, tdb_null,
2087 NULL, NULL,NULL, &timeout, NULL);
2089 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enable_monitor failed\n"));
2099 set the monitoring mode of a remote node to disable
2101 int ctdb_ctrl_disable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2106 ret = ctdb_control(ctdb, destnode, 0,
2107 CTDB_CONTROL_DISABLE_MONITOR, 0, tdb_null,
2108 NULL, NULL, NULL, &timeout, NULL);
2110 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disable_monitor failed\n"));
2122 sent to a node to make it take over an ip address
2124 int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout,
2125 uint32_t destnode, struct ctdb_public_ip *ip)
2128 struct ctdb_public_ipv4 ipv4;
2132 if (ip->addr.sa.sa_family == AF_INET) {
2134 ipv4.sin = ip->addr.ip;
2136 data.dsize = sizeof(ipv4);
2137 data.dptr = (uint8_t *)&ipv4;
2139 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IPv4, 0, data, NULL,
2140 NULL, &res, &timeout, NULL);
2142 data.dsize = sizeof(*ip);
2143 data.dptr = (uint8_t *)ip;
2145 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IP, 0, data, NULL,
2146 NULL, &res, &timeout, NULL);
2149 if (ret != 0 || res != 0) {
2150 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for takeover_ip failed\n"));
2159 sent to a node to make it release an ip address
2161 int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout,
2162 uint32_t destnode, struct ctdb_public_ip *ip)
2165 struct ctdb_public_ipv4 ipv4;
2169 if (ip->addr.sa.sa_family == AF_INET) {
2171 ipv4.sin = ip->addr.ip;
2173 data.dsize = sizeof(ipv4);
2174 data.dptr = (uint8_t *)&ipv4;
2176 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IPv4, 0, data, NULL,
2177 NULL, &res, &timeout, NULL);
2179 data.dsize = sizeof(*ip);
2180 data.dptr = (uint8_t *)ip;
2182 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IP, 0, data, NULL,
2183 NULL, &res, &timeout, NULL);
2186 if (ret != 0 || res != 0) {
2187 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for release_ip failed\n"));
2198 int ctdb_ctrl_get_tunable(struct ctdb_context *ctdb,
2199 struct timeval timeout,
2201 const char *name, uint32_t *value)
2203 struct ctdb_control_get_tunable *t;
2204 TDB_DATA data, outdata;
2208 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(name) + 1;
2209 data.dptr = talloc_size(ctdb, data.dsize);
2210 CTDB_NO_MEMORY(ctdb, data.dptr);
2212 t = (struct ctdb_control_get_tunable *)data.dptr;
2213 t->length = strlen(name)+1;
2214 memcpy(t->name, name, t->length);
2216 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_TUNABLE, 0, data, ctdb,
2217 &outdata, &res, &timeout, NULL);
2218 talloc_free(data.dptr);
2219 if (ret != 0 || res != 0) {
2220 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_tunable failed\n"));
2224 if (outdata.dsize != sizeof(uint32_t)) {
2225 DEBUG(DEBUG_ERR,("Invalid return data in get_tunable\n"));
2226 talloc_free(outdata.dptr);
2230 *value = *(uint32_t *)outdata.dptr;
2231 talloc_free(outdata.dptr);
2239 int ctdb_ctrl_set_tunable(struct ctdb_context *ctdb,
2240 struct timeval timeout,
2242 const char *name, uint32_t value)
2244 struct ctdb_control_set_tunable *t;
2249 data.dsize = offsetof(struct ctdb_control_set_tunable, name) + strlen(name) + 1;
2250 data.dptr = talloc_size(ctdb, data.dsize);
2251 CTDB_NO_MEMORY(ctdb, data.dptr);
2253 t = (struct ctdb_control_set_tunable *)data.dptr;
2254 t->length = strlen(name)+1;
2255 memcpy(t->name, name, t->length);
2258 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_TUNABLE, 0, data, NULL,
2259 NULL, &res, &timeout, NULL);
2260 talloc_free(data.dptr);
2261 if (ret != 0 || res != 0) {
2262 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_tunable failed\n"));
2272 int ctdb_ctrl_list_tunables(struct ctdb_context *ctdb,
2273 struct timeval timeout,
2275 TALLOC_CTX *mem_ctx,
2276 const char ***list, uint32_t *count)
2281 struct ctdb_control_list_tunable *t;
2284 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_LIST_TUNABLES, 0, tdb_null,
2285 mem_ctx, &outdata, &res, &timeout, NULL);
2286 if (ret != 0 || res != 0) {
2287 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for list_tunables failed\n"));
2291 t = (struct ctdb_control_list_tunable *)outdata.dptr;
2292 if (outdata.dsize < offsetof(struct ctdb_control_list_tunable, data) ||
2293 t->length > outdata.dsize-offsetof(struct ctdb_control_list_tunable, data)) {
2294 DEBUG(DEBUG_ERR,("Invalid data in list_tunables reply\n"));
2295 talloc_free(outdata.dptr);
2299 p = talloc_strndup(mem_ctx, (char *)t->data, t->length);
2300 CTDB_NO_MEMORY(ctdb, p);
2302 talloc_free(outdata.dptr);
2307 for (s=strtok_r(p, ":", &ptr); s; s=strtok_r(NULL, ":", &ptr)) {
2308 (*list) = talloc_realloc(mem_ctx, *list, const char *, 1+(*count));
2309 CTDB_NO_MEMORY(ctdb, *list);
2310 (*list)[*count] = talloc_strdup(*list, s);
2311 CTDB_NO_MEMORY(ctdb, (*list)[*count]);
2321 int ctdb_ctrl_get_public_ips_flags(struct ctdb_context *ctdb,
2322 struct timeval timeout, uint32_t destnode,
2323 TALLOC_CTX *mem_ctx,
2325 struct ctdb_all_public_ips **ips)
2331 ret = ctdb_control(ctdb, destnode, 0,
2332 CTDB_CONTROL_GET_PUBLIC_IPS, flags, tdb_null,
2333 mem_ctx, &outdata, &res, &timeout, NULL);
2334 if (ret == 0 && res == -1) {
2335 DEBUG(DEBUG_ERR,(__location__ " ctdb_control to get public ips failed, falling back to ipv4-only version\n"));
2336 return ctdb_ctrl_get_public_ipsv4(ctdb, timeout, destnode, mem_ctx, ips);
2338 if (ret != 0 || res != 0) {
2339 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed ret:%d res:%d\n", ret, res));
2343 *ips = (struct ctdb_all_public_ips *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
2344 talloc_free(outdata.dptr);
2349 int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
2350 struct timeval timeout, uint32_t destnode,
2351 TALLOC_CTX *mem_ctx,
2352 struct ctdb_all_public_ips **ips)
2354 return ctdb_ctrl_get_public_ips_flags(ctdb, timeout,
2359 int ctdb_ctrl_get_public_ipsv4(struct ctdb_context *ctdb,
2360 struct timeval timeout, uint32_t destnode,
2361 TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
2366 struct ctdb_all_public_ipsv4 *ipsv4;
2368 ret = ctdb_control(ctdb, destnode, 0,
2369 CTDB_CONTROL_GET_PUBLIC_IPSv4, 0, tdb_null,
2370 mem_ctx, &outdata, &res, &timeout, NULL);
2371 if (ret != 0 || res != 0) {
2372 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed\n"));
2376 ipsv4 = (struct ctdb_all_public_ipsv4 *)outdata.dptr;
2377 len = offsetof(struct ctdb_all_public_ips, ips) +
2378 ipsv4->num*sizeof(struct ctdb_public_ip);
2379 *ips = talloc_zero_size(mem_ctx, len);
2380 CTDB_NO_MEMORY(ctdb, *ips);
2381 (*ips)->num = ipsv4->num;
2382 for (i=0; i<ipsv4->num; i++) {
2383 (*ips)->ips[i].pnn = ipsv4->ips[i].pnn;
2384 (*ips)->ips[i].addr.ip = ipsv4->ips[i].sin;
2387 talloc_free(outdata.dptr);
2392 int ctdb_ctrl_get_public_ip_info(struct ctdb_context *ctdb,
2393 struct timeval timeout, uint32_t destnode,
2394 TALLOC_CTX *mem_ctx,
2395 const ctdb_sock_addr *addr,
2396 struct ctdb_control_public_ip_info **_info)
2402 struct ctdb_control_public_ip_info *info;
2406 indata.dptr = discard_const_p(uint8_t, addr);
2407 indata.dsize = sizeof(*addr);
2409 ret = ctdb_control(ctdb, destnode, 0,
2410 CTDB_CONTROL_GET_PUBLIC_IP_INFO, 0, indata,
2411 mem_ctx, &outdata, &res, &timeout, NULL);
2412 if (ret != 0 || res != 0) {
2413 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2414 "failed ret:%d res:%d\n",
2419 len = offsetof(struct ctdb_control_public_ip_info, ifaces);
2420 if (len > outdata.dsize) {
2421 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2422 "returned invalid data with size %u > %u\n",
2423 (unsigned int)outdata.dsize,
2424 (unsigned int)len));
2425 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2429 info = (struct ctdb_control_public_ip_info *)outdata.dptr;
2430 len += info->num*sizeof(struct ctdb_control_iface_info);
2432 if (len > outdata.dsize) {
2433 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2434 "returned invalid data with size %u > %u\n",
2435 (unsigned int)outdata.dsize,
2436 (unsigned int)len));
2437 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2441 /* make sure we null terminate the returned strings */
2442 for (i=0; i < info->num; i++) {
2443 info->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2446 *_info = (struct ctdb_control_public_ip_info *)talloc_memdup(mem_ctx,
2449 talloc_free(outdata.dptr);
2450 if (*_info == NULL) {
2451 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2452 "talloc_memdup size %u failed\n",
2453 (unsigned int)outdata.dsize));
2460 int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb,
2461 struct timeval timeout, uint32_t destnode,
2462 TALLOC_CTX *mem_ctx,
2463 struct ctdb_control_get_ifaces **_ifaces)
2468 struct ctdb_control_get_ifaces *ifaces;
2472 ret = ctdb_control(ctdb, destnode, 0,
2473 CTDB_CONTROL_GET_IFACES, 0, tdb_null,
2474 mem_ctx, &outdata, &res, &timeout, NULL);
2475 if (ret != 0 || res != 0) {
2476 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2477 "failed ret:%d res:%d\n",
2482 len = offsetof(struct ctdb_control_get_ifaces, ifaces);
2483 if (len > outdata.dsize) {
2484 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2485 "returned invalid data with size %u > %u\n",
2486 (unsigned int)outdata.dsize,
2487 (unsigned int)len));
2488 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2492 ifaces = (struct ctdb_control_get_ifaces *)outdata.dptr;
2493 len += ifaces->num*sizeof(struct ctdb_control_iface_info);
2495 if (len > outdata.dsize) {
2496 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2497 "returned invalid data with size %u > %u\n",
2498 (unsigned int)outdata.dsize,
2499 (unsigned int)len));
2500 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2504 /* make sure we null terminate the returned strings */
2505 for (i=0; i < ifaces->num; i++) {
2506 ifaces->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2509 *_ifaces = (struct ctdb_control_get_ifaces *)talloc_memdup(mem_ctx,
2512 talloc_free(outdata.dptr);
2513 if (*_ifaces == NULL) {
2514 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2515 "talloc_memdup size %u failed\n",
2516 (unsigned int)outdata.dsize));
2523 int ctdb_ctrl_set_iface_link(struct ctdb_context *ctdb,
2524 struct timeval timeout, uint32_t destnode,
2525 TALLOC_CTX *mem_ctx,
2526 const struct ctdb_control_iface_info *info)
2532 indata.dptr = discard_const_p(uint8_t, info);
2533 indata.dsize = sizeof(*info);
2535 ret = ctdb_control(ctdb, destnode, 0,
2536 CTDB_CONTROL_SET_IFACE_LINK_STATE, 0, indata,
2537 mem_ctx, NULL, &res, &timeout, NULL);
2538 if (ret != 0 || res != 0) {
2539 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set iface link "
2540 "failed ret:%d res:%d\n",
2549 set/clear the permanent disabled bit on a remote node
2551 int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
2552 uint32_t set, uint32_t clear)
2556 struct ctdb_node_map *nodemap=NULL;
2557 struct ctdb_node_flag_change c;
2558 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2563 /* find the recovery master */
2564 ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster);
2566 DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
2567 talloc_free(tmp_ctx);
2572 /* read the node flags from the recmaster */
2573 ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap);
2575 DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode));
2576 talloc_free(tmp_ctx);
2579 if (destnode >= nodemap->num) {
2580 DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode));
2581 talloc_free(tmp_ctx);
2586 c.old_flags = nodemap->nodes[destnode].flags;
2587 c.new_flags = c.old_flags;
2589 c.new_flags &= ~clear;
2591 data.dsize = sizeof(c);
2592 data.dptr = (unsigned char *)&c;
2594 /* send the flags update to all connected nodes */
2595 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2597 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
2599 timeout, false, data,
2602 DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
2604 talloc_free(tmp_ctx);
2608 talloc_free(tmp_ctx);
2616 int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
2617 struct timeval timeout,
2619 struct ctdb_tunable *tunables)
2625 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_ALL_TUNABLES, 0, tdb_null, ctdb,
2626 &outdata, &res, &timeout, NULL);
2627 if (ret != 0 || res != 0) {
2628 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get all tunables failed\n"));
2632 if (outdata.dsize != sizeof(*tunables)) {
2633 DEBUG(DEBUG_ERR,(__location__ " bad data size %u in ctdb_ctrl_get_all_tunables should be %u\n",
2634 (unsigned)outdata.dsize, (unsigned)sizeof(*tunables)));
2638 *tunables = *(struct ctdb_tunable *)outdata.dptr;
2639 talloc_free(outdata.dptr);
2644 add a public address to a node
2646 int ctdb_ctrl_add_public_ip(struct ctdb_context *ctdb,
2647 struct timeval timeout,
2649 struct ctdb_control_ip_iface *pub)
2655 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
2656 data.dptr = (unsigned char *)pub;
2658 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_ADD_PUBLIC_IP, 0, data, NULL,
2659 NULL, &res, &timeout, NULL);
2660 if (ret != 0 || res != 0) {
2661 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for add_public_ip failed\n"));
2669 delete a public address from a node
2671 int ctdb_ctrl_del_public_ip(struct ctdb_context *ctdb,
2672 struct timeval timeout,
2674 struct ctdb_control_ip_iface *pub)
2680 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
2681 data.dptr = (unsigned char *)pub;
2683 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_DEL_PUBLIC_IP, 0, data, NULL,
2684 NULL, &res, &timeout, NULL);
2685 if (ret != 0 || res != 0) {
2686 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for del_public_ip failed\n"));
2694 kill a tcp connection
2696 int ctdb_ctrl_killtcp(struct ctdb_context *ctdb,
2697 struct timeval timeout,
2699 struct ctdb_control_killtcp *killtcp)
2705 data.dsize = sizeof(struct ctdb_control_killtcp);
2706 data.dptr = (unsigned char *)killtcp;
2708 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_KILL_TCP, 0, data, NULL,
2709 NULL, &res, &timeout, NULL);
2710 if (ret != 0 || res != 0) {
2711 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for killtcp failed\n"));
2721 int ctdb_ctrl_gratious_arp(struct ctdb_context *ctdb,
2722 struct timeval timeout,
2724 ctdb_sock_addr *addr,
2730 struct ctdb_control_gratious_arp *gratious_arp;
2731 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2734 len = strlen(ifname)+1;
2735 gratious_arp = talloc_size(tmp_ctx,
2736 offsetof(struct ctdb_control_gratious_arp, iface) + len);
2737 CTDB_NO_MEMORY(ctdb, gratious_arp);
2739 gratious_arp->addr = *addr;
2740 gratious_arp->len = len;
2741 memcpy(&gratious_arp->iface[0], ifname, len);
2744 data.dsize = offsetof(struct ctdb_control_gratious_arp, iface) + len;
2745 data.dptr = (unsigned char *)gratious_arp;
2747 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SEND_GRATIOUS_ARP, 0, data, NULL,
2748 NULL, &res, &timeout, NULL);
2749 if (ret != 0 || res != 0) {
2750 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for gratious_arp failed\n"));
2751 talloc_free(tmp_ctx);
2755 talloc_free(tmp_ctx);
2760 get a list of all tcp tickles that a node knows about for a particular vnn
2762 int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
2763 struct timeval timeout, uint32_t destnode,
2764 TALLOC_CTX *mem_ctx,
2765 ctdb_sock_addr *addr,
2766 struct ctdb_control_tcp_tickle_list **list)
2769 TDB_DATA data, outdata;
2772 data.dptr = (uint8_t*)addr;
2773 data.dsize = sizeof(ctdb_sock_addr);
2775 ret = ctdb_control(ctdb, destnode, 0,
2776 CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data,
2777 mem_ctx, &outdata, &status, NULL, NULL);
2778 if (ret != 0 || status != 0) {
2779 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get tcp tickles failed\n"));
2783 *list = (struct ctdb_control_tcp_tickle_list *)outdata.dptr;
2789 register a server id
2791 int ctdb_ctrl_register_server_id(struct ctdb_context *ctdb,
2792 struct timeval timeout,
2793 struct ctdb_server_id *id)
2799 data.dsize = sizeof(struct ctdb_server_id);
2800 data.dptr = (unsigned char *)id;
2802 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
2803 CTDB_CONTROL_REGISTER_SERVER_ID,
2805 NULL, &res, &timeout, NULL);
2806 if (ret != 0 || res != 0) {
2807 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for register server id failed\n"));
2815 unregister a server id
2817 int ctdb_ctrl_unregister_server_id(struct ctdb_context *ctdb,
2818 struct timeval timeout,
2819 struct ctdb_server_id *id)
2825 data.dsize = sizeof(struct ctdb_server_id);
2826 data.dptr = (unsigned char *)id;
2828 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
2829 CTDB_CONTROL_UNREGISTER_SERVER_ID,
2831 NULL, &res, &timeout, NULL);
2832 if (ret != 0 || res != 0) {
2833 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for unregister server id failed\n"));
2842 check if a server id exists
2844 if a server id does exist, return *status == 1, otherwise *status == 0
2846 int ctdb_ctrl_check_server_id(struct ctdb_context *ctdb,
2847 struct timeval timeout,
2849 struct ctdb_server_id *id,
2856 data.dsize = sizeof(struct ctdb_server_id);
2857 data.dptr = (unsigned char *)id;
2859 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CHECK_SERVER_ID,
2861 NULL, &res, &timeout, NULL);
2863 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for check server id failed\n"));
2877 get the list of server ids that are registered on a node
2879 int ctdb_ctrl_get_server_id_list(struct ctdb_context *ctdb,
2880 TALLOC_CTX *mem_ctx,
2881 struct timeval timeout, uint32_t destnode,
2882 struct ctdb_server_id_list **svid_list)
2888 ret = ctdb_control(ctdb, destnode, 0,
2889 CTDB_CONTROL_GET_SERVER_ID_LIST, 0, tdb_null,
2890 mem_ctx, &outdata, &res, &timeout, NULL);
2891 if (ret != 0 || res != 0) {
2892 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_server_id_list failed\n"));
2896 *svid_list = (struct ctdb_server_id_list *)talloc_steal(mem_ctx, outdata.dptr);
2902 initialise the ctdb daemon for client applications
2904 NOTE: In current code the daemon does not fork. This is for testing purposes only
2905 and to simplify the code.
2907 struct ctdb_context *ctdb_init(struct event_context *ev)
2910 struct ctdb_context *ctdb;
2912 ctdb = talloc_zero(ev, struct ctdb_context);
2914 DEBUG(DEBUG_ERR,(__location__ " talloc_zero failed.\n"));
2918 ctdb->idr = idr_init(ctdb);
2919 /* Wrap early to exercise code. */
2920 ctdb->lastid = INT_MAX-200;
2921 CTDB_NO_MEMORY_NULL(ctdb, ctdb->idr);
2923 ret = ctdb_set_socketname(ctdb, CTDB_PATH);
2925 DEBUG(DEBUG_ERR,(__location__ " ctdb_set_socketname failed.\n"));
2930 ctdb->statistics.statistics_start_time = timeval_current();
2939 void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags)
2941 ctdb->flags |= flags;
2945 setup the local socket name
2947 int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
2949 ctdb->daemon.name = talloc_strdup(ctdb, socketname);
2950 CTDB_NO_MEMORY(ctdb, ctdb->daemon.name);
2955 const char *ctdb_get_socketname(struct ctdb_context *ctdb)
2957 return ctdb->daemon.name;
2961 return the pnn of this node
2963 uint32_t ctdb_get_pnn(struct ctdb_context *ctdb)
2970 get the uptime of a remote node
2972 struct ctdb_client_control_state *
2973 ctdb_ctrl_uptime_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
2975 return ctdb_control_send(ctdb, destnode, 0,
2976 CTDB_CONTROL_UPTIME, 0, tdb_null,
2977 mem_ctx, &timeout, NULL);
2980 int ctdb_ctrl_uptime_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, struct ctdb_uptime **uptime)
2986 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
2987 if (ret != 0 || res != 0) {
2988 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_uptime_recv failed\n"));
2992 *uptime = (struct ctdb_uptime *)talloc_steal(mem_ctx, outdata.dptr);
2997 int ctdb_ctrl_uptime(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_uptime **uptime)
2999 struct ctdb_client_control_state *state;
3001 state = ctdb_ctrl_uptime_send(ctdb, mem_ctx, timeout, destnode);
3002 return ctdb_ctrl_uptime_recv(ctdb, mem_ctx, state, uptime);
3006 send a control to execute the "recovered" event script on a node
3008 int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3013 ret = ctdb_control(ctdb, destnode, 0,
3014 CTDB_CONTROL_END_RECOVERY, 0, tdb_null,
3015 NULL, NULL, &status, &timeout, NULL);
3016 if (ret != 0 || status != 0) {
3017 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for end_recovery failed\n"));
3025 callback for the async helpers used when sending the same control
3026 to multiple nodes in parallell.
3028 static void async_callback(struct ctdb_client_control_state *state)
3030 struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
3031 struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
3035 uint32_t destnode = state->c->hdr.destnode;
3037 /* one more node has responded with recmode data */
3040 /* if we failed to push the db, then return an error and let
3041 the main loop try again.
3043 if (state->state != CTDB_CONTROL_DONE) {
3044 if ( !data->dont_log_errors) {
3045 DEBUG(DEBUG_ERR,("Async operation failed with state %d, opcode:%u\n", state->state, data->opcode));
3048 if (data->fail_callback) {
3049 data->fail_callback(ctdb, destnode, res, outdata,
3050 data->callback_data);
3055 state->async.fn = NULL;
3057 ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL);
3058 if ((ret != 0) || (res != 0)) {
3059 if ( !data->dont_log_errors) {
3060 DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d opcode=%u\n", ret, (int)res, data->opcode));
3063 if (data->fail_callback) {
3064 data->fail_callback(ctdb, destnode, res, outdata,
3065 data->callback_data);
3068 if ((ret == 0) && (data->callback != NULL)) {
3069 data->callback(ctdb, destnode, res, outdata,
3070 data->callback_data);
3075 void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state)
3077 /* set up the callback functions */
3078 state->async.fn = async_callback;
3079 state->async.private_data = data;
3081 /* one more control to wait for to complete */
3086 /* wait for up to the maximum number of seconds allowed
3087 or until all nodes we expect a response from has replied
3089 int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data)
3091 while (data->count > 0) {
3092 event_loop_once(ctdb->ev);
3094 if (data->fail_count != 0) {
3095 if (!data->dont_log_errors) {
3096 DEBUG(DEBUG_ERR,("Async wait failed - fail_count=%u\n",
3106 perform a simple control on the listed nodes
3107 The control cannot return data
3109 int ctdb_client_async_control(struct ctdb_context *ctdb,
3110 enum ctdb_controls opcode,
3113 struct timeval timeout,
3114 bool dont_log_errors,
3116 client_async_callback client_callback,
3117 client_async_callback fail_callback,
3118 void *callback_data)
3120 struct client_async_data *async_data;
3121 struct ctdb_client_control_state *state;
3124 async_data = talloc_zero(ctdb, struct client_async_data);
3125 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
3126 async_data->dont_log_errors = dont_log_errors;
3127 async_data->callback = client_callback;
3128 async_data->fail_callback = fail_callback;
3129 async_data->callback_data = callback_data;
3130 async_data->opcode = opcode;
3132 num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
3134 /* loop over all nodes and send an async control to each of them */
3135 for (j=0; j<num_nodes; j++) {
3136 uint32_t pnn = nodes[j];
3138 state = ctdb_control_send(ctdb, pnn, srvid, opcode,
3139 0, data, async_data, &timeout, NULL);
3140 if (state == NULL) {
3141 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
3142 talloc_free(async_data);
3146 ctdb_client_async_add(async_data, state);
3149 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3150 talloc_free(async_data);
3154 talloc_free(async_data);
3158 uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
3159 struct ctdb_vnn_map *vnn_map,
3160 TALLOC_CTX *mem_ctx,
3163 int i, j, num_nodes;
3166 for (i=num_nodes=0;i<vnn_map->size;i++) {
3167 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3173 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3174 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3176 for (i=j=0;i<vnn_map->size;i++) {
3177 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3180 nodes[j++] = vnn_map->map[i];
3186 uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
3187 struct ctdb_node_map *node_map,
3188 TALLOC_CTX *mem_ctx,
3191 int i, j, num_nodes;
3194 for (i=num_nodes=0;i<node_map->num;i++) {
3195 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3198 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3204 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3205 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3207 for (i=j=0;i<node_map->num;i++) {
3208 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3211 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3214 nodes[j++] = node_map->nodes[i].pnn;
3220 uint32_t *list_of_active_nodes_except_pnn(struct ctdb_context *ctdb,
3221 struct ctdb_node_map *node_map,
3222 TALLOC_CTX *mem_ctx,
3225 int i, j, num_nodes;
3228 for (i=num_nodes=0;i<node_map->num;i++) {
3229 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3232 if (node_map->nodes[i].pnn == pnn) {
3238 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3239 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3241 for (i=j=0;i<node_map->num;i++) {
3242 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3245 if (node_map->nodes[i].pnn == pnn) {
3248 nodes[j++] = node_map->nodes[i].pnn;
3254 uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
3255 struct ctdb_node_map *node_map,
3256 TALLOC_CTX *mem_ctx,
3259 int i, j, num_nodes;
3262 for (i=num_nodes=0;i<node_map->num;i++) {
3263 if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
3266 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3272 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3273 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3275 for (i=j=0;i<node_map->num;i++) {
3276 if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
3279 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3282 nodes[j++] = node_map->nodes[i].pnn;
3289 this is used to test if a pnn lock exists and if it exists will return
3290 the number of connections that pnn has reported or -1 if that recovery
3291 daemon is not running.
3294 ctdb_read_pnn_lock(int fd, int32_t pnn)
3299 lock.l_type = F_WRLCK;
3300 lock.l_whence = SEEK_SET;
3305 if (fcntl(fd, F_GETLK, &lock) != 0) {
3306 DEBUG(DEBUG_ERR, (__location__ " F_GETLK failed with %s\n", strerror(errno)));
3310 if (lock.l_type == F_UNLCK) {
3314 if (pread(fd, &c, 1, pnn) == -1) {
3315 DEBUG(DEBUG_CRIT,(__location__ " failed read pnn count - %s\n", strerror(errno)));
3323 get capabilities of a remote node
3325 struct ctdb_client_control_state *
3326 ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
3328 return ctdb_control_send(ctdb, destnode, 0,
3329 CTDB_CONTROL_GET_CAPABILITIES, 0, tdb_null,
3330 mem_ctx, &timeout, NULL);
3333 int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities)
3339 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
3340 if ( (ret != 0) || (res != 0) ) {
3341 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n"));
3346 *capabilities = *((uint32_t *)outdata.dptr);
3352 int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities)
3354 struct ctdb_client_control_state *state;
3355 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
3358 state = ctdb_ctrl_getcapabilities_send(ctdb, tmp_ctx, timeout, destnode);
3359 ret = ctdb_ctrl_getcapabilities_recv(ctdb, tmp_ctx, state, capabilities);
3360 talloc_free(tmp_ctx);
3365 * check whether a transaction is active on a given db on a given node
3367 int32_t ctdb_ctrl_transaction_active(struct ctdb_context *ctdb,
3375 indata.dptr = (uint8_t *)&db_id;
3376 indata.dsize = sizeof(db_id);
3378 ret = ctdb_control(ctdb, destnode, 0,
3379 CTDB_CONTROL_TRANS2_ACTIVE,
3380 0, indata, NULL, NULL, &status,
3384 DEBUG(DEBUG_ERR, (__location__ " ctdb control for transaction_active failed\n"));
3392 struct ctdb_transaction_handle {
3393 struct ctdb_db_context *ctdb_db;
3396 * we store the reads and writes done under a transaction:
3397 * - one list stores both reads and writes (m_all),
3398 * - the other just writes (m_write)
3400 struct ctdb_marshall_buffer *m_all;
3401 struct ctdb_marshall_buffer *m_write;
3404 /* start a transaction on a database */
3405 static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h)
3407 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3411 /* start a transaction on a database */
3412 static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
3414 struct ctdb_record_handle *rh;
3417 struct ctdb_ltdb_header header;
3418 TALLOC_CTX *tmp_ctx;
3419 const char *keyname = CTDB_TRANSACTION_LOCK_KEY;
3421 struct ctdb_db_context *ctdb_db = h->ctdb_db;
3425 key.dptr = discard_const(keyname);
3426 key.dsize = strlen(keyname);
3428 if (!ctdb_db->persistent) {
3429 DEBUG(DEBUG_ERR,(__location__ " Attempted transaction on non-persistent database\n"));
3434 tmp_ctx = talloc_new(h);
3436 rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL);
3438 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n"));
3439 talloc_free(tmp_ctx);
3443 status = ctdb_ctrl_transaction_active(ctdb_db->ctdb,
3447 unsigned long int usec = (1000 + random()) % 100000;
3448 DEBUG(DEBUG_DEBUG, (__location__ " transaction is active "
3449 "on db_id[0x%08x]. waiting for %lu "
3451 ctdb_db->db_id, usec));
3452 talloc_free(tmp_ctx);
3458 * store the pid in the database:
3459 * it is not enough that the node is dmaster...
3462 data.dptr = (unsigned char *)&pid;
3463 data.dsize = sizeof(pid_t);
3465 rh->header.dmaster = ctdb_db->ctdb->pnn;
3466 ret = ctdb_ltdb_store(ctdb_db, key, &(rh->header), data);
3468 DEBUG(DEBUG_ERR, (__location__ " Failed to store pid in "
3469 "transaction record\n"));
3470 talloc_free(tmp_ctx);
3476 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
3478 DEBUG(DEBUG_ERR,(__location__ " Failed to start tdb transaction\n"));
3479 talloc_free(tmp_ctx);
3483 ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, &data);
3485 DEBUG(DEBUG_ERR,(__location__ " Failed to re-fetch transaction "
3486 "lock record inside transaction\n"));
3487 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3488 talloc_free(tmp_ctx);
3492 if (header.dmaster != ctdb_db->ctdb->pnn) {
3493 DEBUG(DEBUG_DEBUG,(__location__ " not dmaster any more on "
3494 "transaction lock record\n"));
3495 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3496 talloc_free(tmp_ctx);
3500 if ((data.dsize != sizeof(pid_t)) || (*(pid_t *)(data.dptr) != pid)) {
3501 DEBUG(DEBUG_DEBUG, (__location__ " my pid is not stored in "
3502 "the transaction lock record\n"));
3503 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3504 talloc_free(tmp_ctx);
3508 talloc_free(tmp_ctx);
3514 /* start a transaction on a database */
3515 struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
3516 TALLOC_CTX *mem_ctx)
3518 struct ctdb_transaction_handle *h;
3521 h = talloc_zero(mem_ctx, struct ctdb_transaction_handle);
3523 DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n"));
3527 h->ctdb_db = ctdb_db;
3529 ret = ctdb_transaction_fetch_start(h);
3535 talloc_set_destructor(h, ctdb_transaction_destructor);
3543 fetch a record inside a transaction
3545 int ctdb_transaction_fetch(struct ctdb_transaction_handle *h,
3546 TALLOC_CTX *mem_ctx,
3547 TDB_DATA key, TDB_DATA *data)
3549 struct ctdb_ltdb_header header;
3552 ZERO_STRUCT(header);
3554 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data);
3555 if (ret == -1 && header.dmaster == (uint32_t)-1) {
3556 /* record doesn't exist yet */
3565 if (!h->in_replay) {
3566 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data);
3567 if (h->m_all == NULL) {
3568 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3577 stores a record inside a transaction
3579 int ctdb_transaction_store(struct ctdb_transaction_handle *h,
3580 TDB_DATA key, TDB_DATA data)
3582 TALLOC_CTX *tmp_ctx = talloc_new(h);
3583 struct ctdb_ltdb_header header;
3587 ZERO_STRUCT(header);
3589 /* we need the header so we can update the RSN */
3590 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
3591 if (ret == -1 && header.dmaster == (uint32_t)-1) {
3592 /* the record doesn't exist - create one with us as dmaster.
3593 This is only safe because we are in a transaction and this
3594 is a persistent database */
3595 ZERO_STRUCT(header);
3596 } else if (ret != 0) {
3597 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n"));
3598 talloc_free(tmp_ctx);
3602 if (data.dsize == olddata.dsize &&
3603 memcmp(data.dptr, olddata.dptr, data.dsize) == 0) {
3604 /* save writing the same data */
3605 talloc_free(tmp_ctx);
3609 header.dmaster = h->ctdb_db->ctdb->pnn;
3612 if (!h->in_replay) {
3613 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data);
3614 if (h->m_all == NULL) {
3615 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3616 talloc_free(tmp_ctx);
3621 h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
3622 if (h->m_write == NULL) {
3623 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3624 talloc_free(tmp_ctx);
3628 ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data);
3630 talloc_free(tmp_ctx);
3636 replay a transaction
3638 static int ctdb_replay_transaction(struct ctdb_transaction_handle *h)
3641 struct ctdb_rec_data *rec = NULL;
3643 h->in_replay = true;
3644 talloc_free(h->m_write);
3647 ret = ctdb_transaction_fetch_start(h);
3652 for (i=0;i<h->m_all->count;i++) {
3655 rec = ctdb_marshall_loop_next(h->m_all, rec, NULL, NULL, &key, &data);
3657 DEBUG(DEBUG_ERR, (__location__ " Out of records in ctdb_replay_transaction?\n"));
3661 if (rec->reqid == 0) {
3663 if (ctdb_transaction_store(h, key, data) != 0) {
3668 TALLOC_CTX *tmp_ctx = talloc_new(h);
3670 if (ctdb_transaction_fetch(h, tmp_ctx, key, &data2) != 0) {
3671 talloc_free(tmp_ctx);
3674 if (data2.dsize != data.dsize ||
3675 memcmp(data2.dptr, data.dptr, data.dsize) != 0) {
3676 /* the record has changed on us - we have to give up */
3677 talloc_free(tmp_ctx);
3680 talloc_free(tmp_ctx);
3687 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3693 commit a transaction
3695 int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
3699 struct ctdb_context *ctdb = h->ctdb_db->ctdb;
3700 struct timeval timeout;
3701 enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR;
3703 talloc_set_destructor(h, NULL);
3705 /* our commit strategy is quite complex.
3707 - we first try to commit the changes to all other nodes
3709 - if that works, then we commit locally and we are done
3711 - if a commit on another node fails, then we need to cancel
3712 the transaction, then restart the transaction (thus
3713 opening a window of time for a pending recovery to
3714 complete), then replay the transaction, checking all the
3715 reads and writes (checking that reads give the same data,
3716 and writes succeed). Then we retry the transaction to the
3721 if (h->m_write == NULL) {
3722 /* no changes were made */
3723 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3728 /* tell ctdbd to commit to the other nodes */
3729 timeout = timeval_current_ofs(1, 0);
3730 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3731 retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, 0,
3732 ctdb_marshall_finish(h->m_write), NULL, NULL, &status,
3734 if (ret != 0 || status != 0) {
3735 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3736 DEBUG(DEBUG_NOTICE, (__location__ " transaction commit%s failed"
3737 ", retrying after 1 second...\n",
3738 (retries==0)?"":"retry "));
3742 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3744 /* work out what error code we will give if we
3745 have to fail the operation */
3746 switch ((enum ctdb_trans2_commit_error)status) {
3747 case CTDB_TRANS2_COMMIT_SUCCESS:
3748 case CTDB_TRANS2_COMMIT_SOMEFAIL:
3749 case CTDB_TRANS2_COMMIT_TIMEOUT:
3750 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3752 case CTDB_TRANS2_COMMIT_ALLFAIL:
3753 failure_control = CTDB_CONTROL_TRANS2_FINISHED;
3758 if (++retries == 100) {
3759 DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n",
3760 h->ctdb_db->db_id, retries, (unsigned)failure_control));
3761 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3762 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3763 tdb_null, NULL, NULL, NULL, NULL, NULL);
3768 if (ctdb_replay_transaction(h) != 0) {
3769 DEBUG(DEBUG_ERR, (__location__ " Failed to replay "
3770 "transaction on db 0x%08x, "
3771 "failure control =%u\n",
3773 (unsigned)failure_control));
3774 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3775 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3776 tdb_null, NULL, NULL, NULL, NULL, NULL);
3782 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3785 /* do the real commit locally */
3786 ret = tdb_transaction_commit(h->ctdb_db->ltdb->tdb);
3788 DEBUG(DEBUG_ERR, (__location__ " Failed to commit transaction "
3789 "on db id 0x%08x locally, "
3790 "failure_control=%u\n",
3792 (unsigned)failure_control));
3793 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3794 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3795 tdb_null, NULL, NULL, NULL, NULL, NULL);
3800 /* tell ctdbd that we are finished with our local commit */
3801 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3802 CTDB_CONTROL_TRANS2_FINISHED, CTDB_CTRL_FLAG_NOREPLY,
3803 tdb_null, NULL, NULL, NULL, NULL, NULL);
3809 recovery daemon ping to main daemon
3811 int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
3816 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
3817 ctdb, NULL, &res, NULL, NULL);
3818 if (ret != 0 || res != 0) {
3819 DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
3826 /* when forking the main daemon and the child process needs to connect back
3827 * to the daemon as a client process, this function can be used to change
3828 * the ctdb context from daemon into client mode
3830 int switch_from_server_to_client(struct ctdb_context *ctdb)
3834 /* shutdown the transport */
3835 if (ctdb->methods) {
3836 ctdb->methods->shutdown(ctdb);
3839 /* get a new event context */
3840 talloc_free(ctdb->ev);
3841 ctdb->ev = event_context_init(ctdb);
3843 close(ctdb->daemon.sd);
3844 ctdb->daemon.sd = -1;
3846 /* initialise ctdb */
3847 ret = ctdb_socket_connect(ctdb);
3849 DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
3857 get the status of running the monitor eventscripts: NULL means never run.
3859 int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb,
3860 struct timeval timeout, uint32_t destnode,
3861 TALLOC_CTX *mem_ctx, enum ctdb_eventscript_call type,
3862 struct ctdb_scripts_wire **script_status)
3865 TDB_DATA outdata, indata;
3867 uint32_t uinttype = type;
3869 indata.dptr = (uint8_t *)&uinttype;
3870 indata.dsize = sizeof(uinttype);
3872 ret = ctdb_control(ctdb, destnode, 0,
3873 CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, indata,
3874 mem_ctx, &outdata, &res, &timeout, NULL);
3875 if (ret != 0 || res != 0) {
3876 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getscriptstatus failed ret:%d res:%d\n", ret, res));
3880 if (outdata.dsize == 0) {
3881 *script_status = NULL;
3883 *script_status = (struct ctdb_scripts_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
3884 talloc_free(outdata.dptr);
3891 tell the main daemon how long it took to lock the reclock file
3893 int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval timeout, double latency)
3899 data.dptr = (uint8_t *)&latency;
3900 data.dsize = sizeof(latency);
3902 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_RECLOCK_LATENCY, 0, data,
3903 ctdb, NULL, &res, NULL, NULL);
3904 if (ret != 0 || res != 0) {
3905 DEBUG(DEBUG_ERR,("Failed to send recd reclock latency\n"));
3913 get the name of the reclock file
3915 int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout,
3916 uint32_t destnode, TALLOC_CTX *mem_ctx,
3923 ret = ctdb_control(ctdb, destnode, 0,
3924 CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
3925 mem_ctx, &data, &res, &timeout, NULL);
3926 if (ret != 0 || res != 0) {
3930 if (data.dsize == 0) {
3933 *name = talloc_strdup(mem_ctx, discard_const(data.dptr));
3935 talloc_free(data.dptr);
3941 set the reclock filename for a node
3943 int ctdb_ctrl_setreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *reclock)
3949 if (reclock == NULL) {
3953 data.dsize = strlen(reclock) + 1;
3954 data.dptr = discard_const(reclock);
3957 ret = ctdb_control(ctdb, destnode, 0,
3958 CTDB_CONTROL_SET_RECLOCK_FILE, 0, data,
3959 NULL, NULL, &res, &timeout, NULL);
3960 if (ret != 0 || res != 0) {
3961 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setreclock failed\n"));
3971 int ctdb_ctrl_stop_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3976 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_STOP_NODE, 0, tdb_null,
3977 ctdb, NULL, &res, &timeout, NULL);
3978 if (ret != 0 || res != 0) {
3979 DEBUG(DEBUG_ERR,("Failed to stop node\n"));
3989 int ctdb_ctrl_continue_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3993 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CONTINUE_NODE, 0, tdb_null,
3994 ctdb, NULL, NULL, &timeout, NULL);
3996 DEBUG(DEBUG_ERR,("Failed to continue node\n"));
4004 set the natgw state for a node
4006 int ctdb_ctrl_setnatgwstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t natgwstate)
4012 data.dsize = sizeof(natgwstate);
4013 data.dptr = (uint8_t *)&natgwstate;
4015 ret = ctdb_control(ctdb, destnode, 0,
4016 CTDB_CONTROL_SET_NATGWSTATE, 0, data,
4017 NULL, NULL, &res, &timeout, NULL);
4018 if (ret != 0 || res != 0) {
4019 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setnatgwstate failed\n"));
4027 set the lmaster role for a node
4029 int ctdb_ctrl_setlmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t lmasterrole)
4035 data.dsize = sizeof(lmasterrole);
4036 data.dptr = (uint8_t *)&lmasterrole;
4038 ret = ctdb_control(ctdb, destnode, 0,
4039 CTDB_CONTROL_SET_LMASTERROLE, 0, data,
4040 NULL, NULL, &res, &timeout, NULL);
4041 if (ret != 0 || res != 0) {
4042 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setlmasterrole failed\n"));
4050 set the recmaster role for a node
4052 int ctdb_ctrl_setrecmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmasterrole)
4058 data.dsize = sizeof(recmasterrole);
4059 data.dptr = (uint8_t *)&recmasterrole;
4061 ret = ctdb_control(ctdb, destnode, 0,
4062 CTDB_CONTROL_SET_RECMASTERROLE, 0, data,
4063 NULL, NULL, &res, &timeout, NULL);
4064 if (ret != 0 || res != 0) {
4065 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmasterrole failed\n"));
4072 /* enable an eventscript
4074 int ctdb_ctrl_enablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4080 data.dsize = strlen(script) + 1;
4081 data.dptr = discard_const(script);
4083 ret = ctdb_control(ctdb, destnode, 0,
4084 CTDB_CONTROL_ENABLE_SCRIPT, 0, data,
4085 NULL, NULL, &res, &timeout, NULL);
4086 if (ret != 0 || res != 0) {
4087 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enablescript failed\n"));
4094 /* disable an eventscript
4096 int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4102 data.dsize = strlen(script) + 1;
4103 data.dptr = discard_const(script);
4105 ret = ctdb_control(ctdb, destnode, 0,
4106 CTDB_CONTROL_DISABLE_SCRIPT, 0, data,
4107 NULL, NULL, &res, &timeout, NULL);
4108 if (ret != 0 || res != 0) {
4109 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disablescript failed\n"));
4117 int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime)
4123 data.dsize = sizeof(*bantime);
4124 data.dptr = (uint8_t *)bantime;
4126 ret = ctdb_control(ctdb, destnode, 0,
4127 CTDB_CONTROL_SET_BAN_STATE, 0, data,
4128 NULL, NULL, &res, &timeout, NULL);
4129 if (ret != 0 || res != 0) {
4130 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4138 int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime)
4143 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4145 ret = ctdb_control(ctdb, destnode, 0,
4146 CTDB_CONTROL_GET_BAN_STATE, 0, tdb_null,
4147 tmp_ctx, &outdata, &res, &timeout, NULL);
4148 if (ret != 0 || res != 0) {
4149 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4150 talloc_free(tmp_ctx);
4154 *bantime = (struct ctdb_ban_time *)talloc_steal(mem_ctx, outdata.dptr);
4155 talloc_free(tmp_ctx);
4161 int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio)
4166 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4168 data.dptr = (uint8_t*)db_prio;
4169 data.dsize = sizeof(*db_prio);
4171 ret = ctdb_control(ctdb, destnode, 0,
4172 CTDB_CONTROL_SET_DB_PRIORITY, 0, data,
4173 tmp_ctx, NULL, &res, &timeout, NULL);
4174 if (ret != 0 || res != 0) {
4175 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4176 talloc_free(tmp_ctx);
4180 talloc_free(tmp_ctx);
4185 int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority)
4190 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4192 data.dptr = (uint8_t*)&db_id;
4193 data.dsize = sizeof(db_id);
4195 ret = ctdb_control(ctdb, destnode, 0,
4196 CTDB_CONTROL_GET_DB_PRIORITY, 0, data,
4197 tmp_ctx, NULL, &res, &timeout, NULL);
4198 if (ret != 0 || res < 0) {
4199 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4200 talloc_free(tmp_ctx);
4208 talloc_free(tmp_ctx);