#include "db_wrap.h"
#include "lib/tdb/include/tdb.h"
#include "lib/util/dlinklist.h"
-#include "lib/events/events.h"
+#include "lib/tevent/tevent.h"
#include "system/network.h"
#include "system/filesys.h"
+#include "system/locale.h"
+#include <stdlib.h>
#include "../include/ctdb_private.h"
#include "lib/util/dlinklist.h"
+pid_t ctdbd_pid;
+
/*
allocate a packet for use in client<->daemon communication
*/
*/
int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
- TDB_DATA *data, uint32_t caller)
+ TDB_DATA *data, bool updatetdb)
{
struct ctdb_call_info *c;
struct ctdb_registered_call *fn;
c->new_data = NULL;
c->reply_data = NULL;
c->status = 0;
+ c->header = header;
for (fn=ctdb_db->calls;fn;fn=fn->next) {
if (fn->id == call->call_id) break;
return -1;
}
- if (header->laccessor != caller) {
- header->lacount = 0;
- }
- header->laccessor = caller;
- header->lacount++;
-
- /* we need to force the record to be written out if this was a remote access,
- so that the lacount is updated */
- if (c->new_data == NULL && header->laccessor != ctdb->pnn) {
+ /* we need to force the record to be written out if this was a remote access */
+ if (c->new_data == NULL) {
c->new_data = &c->record_data;
}
- if (c->new_data) {
+ if (c->new_data && updatetdb) {
/* XXX check that we always have the lock here? */
if (ctdb_ltdb_store(ctdb_db, call->key, header, *c->new_data) != 0) {
ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
/*
this is called in the client, when data comes in from the daemon
*/
-static void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
+void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
{
struct ctdb_context *ctdb = talloc_get_type(args, struct ctdb_context);
struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
}
/*
- connect to a unix domain socket
+ connect with exponential backoff, thanks Stevens
*/
-int ctdb_socket_connect(struct ctdb_context *ctdb)
+#define CONNECT_MAXSLEEP 64
+static int ctdb_connect_retry(struct ctdb_context *ctdb)
{
struct sockaddr_un addr;
+ int secs;
+ int ret = 0;
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
+ for (secs = 1; secs <= CONNECT_MAXSLEEP; secs *= 2) {
+ ret = connect(ctdb->daemon.sd, (struct sockaddr *)&addr,
+ sizeof(addr));
+ if ((ret == 0) || (errno != EAGAIN)) {
+ break;
+ }
+
+ if (secs <= (CONNECT_MAXSLEEP / 2)) {
+ DEBUG(DEBUG_ERR,("connect failed: %s, retry in %d second(s)\n",
+ strerror(errno), secs));
+ sleep(secs);
+ }
+ }
+
+ return ret;
+}
+
+/*
+ connect to a unix domain socket
+*/
+int ctdb_socket_connect(struct ctdb_context *ctdb)
+{
ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
if (ctdb->daemon.sd == -1) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
return -1;
}
set_nonblocking(ctdb->daemon.sd);
set_close_on_exec(ctdb->daemon.sd);
-
- if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+
+ if (ctdb_connect_retry(ctdb) == -1) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon. Errno:%s(%d)\n", strerror(errno), errno));
close(ctdb->daemon.sd);
ctdb->daemon.sd = -1;
return -1;
ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
CTDB_DS_ALIGNMENT,
- ctdb_client_read_cb, ctdb);
+ ctdb_client_read_cb, ctdb, "to-ctdbd");
return 0;
}
call->status = state->call->status;
talloc_free(state);
- return 0;
+ return call->status;
}
*(state->call) = *call;
state->ctdb_db = ctdb_db;
- ret = ctdb_call_local(ctdb_db, state->call, header, state, data, ctdb->pnn);
+ ret = ctdb_call_local(ctdb_db, state->call, header, state, data, true);
return state;
}
ret = ctdb_ltdb_fetch(ctdb_db, call->key, &header, ctdb_db, &data);
+ if ((call->flags & CTDB_IMMEDIATE_MIGRATION) && (header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
+ ret = -1;
+ }
+
if (ret == 0 && header.dmaster == ctdb->pnn) {
state = ctdb_client_call_local_send(ctdb_db, call, &header, &data);
talloc_free(data.dptr);
tell the daemon what messaging srvid we will use, and register the message
handler function in the client
*/
-int ctdb_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
- ctdb_message_fn_t handler,
+int ctdb_client_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
+ ctdb_msg_fn_t handler,
void *private_data)
{
/*
tell the daemon we no longer want a srvid
*/
-int ctdb_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
+int ctdb_client_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
{
int res;
int32_t status;
/*
send a message - from client context
*/
-int ctdb_send_message(struct ctdb_context *ctdb, uint32_t pnn,
+int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
uint64_t srvid, TDB_DATA data)
{
struct ctdb_req_message *r;
return ctdb_call(ctdb_db, &call);
}
+/*
+ try to fetch a readonly copy of a record
+ */
+static int
+ctdb_client_fetch_readonly(struct ctdb_db_context *ctdb_db, TDB_DATA key, TALLOC_CTX *mem_ctx, struct ctdb_ltdb_header **hdr, TDB_DATA *data)
+{
+ int ret;
+
+ struct ctdb_call call;
+ ZERO_STRUCT(call);
+
+ call.call_id = CTDB_FETCH_WITH_HEADER_FUNC;
+ call.call_data.dptr = NULL;
+ call.call_data.dsize = 0;
+ call.key = key;
+ call.flags = CTDB_WANT_READONLY;
+ ret = ctdb_call(ctdb_db, &call);
+
+ if (ret != 0) {
+ return -1;
+ }
+ if (call.reply_data.dsize < sizeof(struct ctdb_ltdb_header)) {
+ return -1;
+ }
+
+ *hdr = talloc_memdup(mem_ctx, &call.reply_data.dptr[0], sizeof(struct ctdb_ltdb_header));
+ if (*hdr == NULL) {
+ talloc_free(call.reply_data.dptr);
+ return -1;
+ }
+
+ data->dsize = call.reply_data.dsize - sizeof(struct ctdb_ltdb_header);
+ data->dptr = talloc_memdup(mem_ctx, &call.reply_data.dptr[sizeof(struct ctdb_ltdb_header)], data->dsize);
+ if (data->dptr == NULL) {
+ talloc_free(call.reply_data.dptr);
+ talloc_free(hdr);
+ return -1;
+ }
+
+ return 0;
+}
+
/*
get a lock on a record, and return the records data. Blocks until it gets the lock
*/
}
/*
- store some data to the record that was locked with ctdb_fetch_lock()
-*/
-int ctdb_record_store(struct ctdb_record_handle *h, TDB_DATA data)
+ get a readonly lock on a record, and return the records data. Blocks until it gets the lock
+ */
+struct ctdb_record_handle *
+ctdb_fetch_readonly_lock(
+ struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
+ TDB_DATA key, TDB_DATA *data,
+ int read_only)
{
int ret;
- int32_t status;
- struct ctdb_rec_data *rec;
- TDB_DATA recdata;
+ struct ctdb_record_handle *h;
+ struct ctdb_ltdb_header *roheader = NULL;
- if (h->ctdb_db->persistent) {
- h->header.rsn++;
+ h = talloc_zero(mem_ctx, struct ctdb_record_handle);
+ if (h == NULL) {
+ return NULL;
+ }
+
+ h->ctdb_db = ctdb_db;
+ h->key = key;
+ h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
+ if (h->key.dptr == NULL) {
+ talloc_free(h);
+ return NULL;
}
+ h->data = data;
+
+ data->dptr = NULL;
+ data->dsize = 0;
- ret = ctdb_ltdb_store(h->ctdb_db, h->key, &h->header, data);
+
+again:
+ talloc_free(roheader);
+ roheader = NULL;
+
+ talloc_free(data->dptr);
+ data->dptr = NULL;
+ data->dsize = 0;
+
+ /* Lock the record/chain */
+ ret = ctdb_ltdb_lock(ctdb_db, key);
if (ret != 0) {
- return ret;
+ DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
+ talloc_free(h);
+ return NULL;
}
- /* don't need the persistent_store control for non-persistent databases */
- if (!h->ctdb_db->persistent) {
- return 0;
+ talloc_set_destructor(h, fetch_lock_destructor);
+
+ /* Check if record exists yet in the TDB */
+ ret = ctdb_ltdb_fetch_readonly(ctdb_db, key, &h->header, h, data);
+ if (ret != 0) {
+ ctdb_ltdb_unlock(ctdb_db, key);
+ ret = ctdb_client_force_migration(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
+ talloc_free(h);
+ return NULL;
+ }
+ goto again;
}
- rec = ctdb_marshall_record(h, h->ctdb_db->db_id, h->key, &h->header, data);
- if (rec == NULL) {
- DEBUG(DEBUG_ERR,("Unable to marshall record in ctdb_record_store\n"));
- return -1;
+ /* if this is a request for read/write and we have delegations
+ we have to revoke all delegations first
+ */
+ if ((read_only == 0)
+ && (h->header.dmaster == ctdb_db->ctdb->pnn)
+ && (h->header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
+ ctdb_ltdb_unlock(ctdb_db, key);
+ ret = ctdb_client_force_migration(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
+ talloc_free(h);
+ return NULL;
+ }
+ goto again;
+ }
+
+ /* if we are dmaster, just return the handle */
+ if (h->header.dmaster == ctdb_db->ctdb->pnn) {
+ return h;
}
- recdata.dptr = (uint8_t *)rec;
- recdata.dsize = rec->length;
+ if (read_only != 0) {
+ TDB_DATA rodata = {NULL, 0};
- ret = ctdb_control(h->ctdb_db->ctdb, CTDB_CURRENT_NODE, 0,
- CTDB_CONTROL_PERSISTENT_STORE, 0,
- recdata, NULL, NULL, &status, NULL, NULL);
+ if ((h->header.flags & CTDB_REC_RO_HAVE_READONLY)
+ || (h->header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
+ return h;
+ }
+
+ ctdb_ltdb_unlock(ctdb_db, key);
+ ret = ctdb_client_fetch_readonly(ctdb_db, key, h, &roheader, &rodata);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("ctdb_fetch_readonly_lock: failed. force migration and try again\n"));
+ ret = ctdb_client_force_migration(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
+ talloc_free(h);
+ return NULL;
+ }
+
+ goto again;
+ }
- talloc_free(rec);
+ if (!(roheader->flags&CTDB_REC_RO_HAVE_READONLY)) {
+ ret = ctdb_client_force_migration(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
+ talloc_free(h);
+ return NULL;
+ }
- if (ret != 0 || status != 0) {
- DEBUG(DEBUG_ERR,("Failed persistent store in ctdb_record_store\n"));
+ goto again;
+ }
+
+ ret = ctdb_ltdb_lock(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
+ talloc_free(h);
+ return NULL;
+ }
+
+ ret = ctdb_ltdb_fetch_readonly(ctdb_db, key, &h->header, h, data);
+ if (ret != 0) {
+ ctdb_ltdb_unlock(ctdb_db, key);
+
+ ret = ctdb_client_force_migration(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
+ talloc_free(h);
+ return NULL;
+ }
+
+ goto again;
+ }
+
+ if (h->header.rsn >= roheader->rsn) {
+ DEBUG(DEBUG_ERR,("READONLY RECORD: Too small RSN, migrate and try again\n"));
+ ctdb_ltdb_unlock(ctdb_db, key);
+
+ ret = ctdb_client_force_migration(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
+ talloc_free(h);
+ return NULL;
+ }
+
+ goto again;
+ }
+
+ if (ctdb_ltdb_store(ctdb_db, key, roheader, rodata) != 0) {
+ ctdb_ltdb_unlock(ctdb_db, key);
+
+ ret = ctdb_client_force_migration(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
+ talloc_free(h);
+ return NULL;
+ }
+
+ goto again;
+ }
+ return h;
+ }
+
+ /* we are not dmaster and this was not a request for a readonly lock
+ * so unlock the record, migrate it and try again
+ */
+ ctdb_ltdb_unlock(ctdb_db, key);
+ ret = ctdb_client_force_migration(ctdb_db, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
+ talloc_free(h);
+ return NULL;
+ }
+ goto again;
+}
+
+/*
+ store some data to the record that was locked with ctdb_fetch_lock()
+*/
+int ctdb_record_store(struct ctdb_record_handle *h, TDB_DATA data)
+{
+ if (h->ctdb_db->persistent) {
+ DEBUG(DEBUG_ERR, (__location__ " ctdb_record_store prohibited for persistent dbs\n"));
return -1;
}
- return 0;
+ return ctdb_ltdb_store(h->ctdb_db, h->key, &h->header, data);
}
/*
call.call_id = CTDB_FETCH_FUNC;
call.call_data.dptr = NULL;
call.call_data.dsize = 0;
+ call.key = key;
ret = ctdb_call(ctdb_db, &call);
{
struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
- DEBUG(DEBUG_ERR,("control timed out. reqid:%d opcode:%d dstnode:%d\n", state->reqid, state->c->opcode, state->c->hdr.destnode));
+ DEBUG(DEBUG_ERR,(__location__ " control timed out. reqid:%u opcode:%u "
+ "dstnode:%u\n", state->reqid, state->c->opcode,
+ state->c->hdr.destnode));
state->state = CTDB_CONTROL_TIMEOUT;
CTDB_NO_MEMORY_NULL(ctdb, c);
c->hdr.reqid = state->reqid;
c->hdr.destnode = destnode;
- c->hdr.reqid = state->reqid;
c->opcode = opcode;
c->client_id = 0;
c->flags = flags;
{
TALLOC_CTX *tmp_ctx;
+ if (status != NULL) {
+ *status = -1;
+ }
+ if (errormsg != NULL) {
+ *errormsg = NULL;
+ }
+
if (state == NULL) {
return -1;
}
CTDB_CONTROL_GET_DBMAP, 0, tdb_null,
mem_ctx, &outdata, &res, &timeout, NULL);
if (ret != 0 || res != 0) {
- DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed\n"));
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res));
return -1;
}
}
/*
- get the reclock filename
+ get a list of nodes (vnn and flags ) from a remote node
*/
-int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
- TALLOC_CTX *mem_ctx, const char **reclock)
+int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
{
int ret;
TDB_DATA outdata;
int32_t res;
ret = ctdb_control(ctdb, destnode, 0,
- CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
+ CTDB_CONTROL_GET_NODEMAP, 0, tdb_null,
mem_ctx, &outdata, &res, &timeout, NULL);
- if (ret != 0 || res != 0) {
- DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getreclock failed\n"));
+ if (ret == 0 && res == -1 && outdata.dsize == 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed, falling back to ipv4-only control\n"));
+ return ctdb_ctrl_getnodemapv4(ctdb, timeout, destnode, mem_ctx, nodemap);
+ }
+ if (ret != 0 || res != 0 || outdata.dsize == 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed ret:%d res:%d\n", ret, res));
return -1;
}
- *reclock = (const char *)talloc_steal(mem_ctx, outdata.dptr);
-
+ *nodemap = (struct ctdb_node_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+ talloc_free(outdata.dptr);
+
return 0;
}
/*
- get a list of nodes (vnn and flags ) from a remote node
+ old style ipv4-only get a list of nodes (vnn and flags ) from a remote node
*/
-int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
+int ctdb_ctrl_getnodemapv4(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
{
- int ret;
+ int ret, i, len;
TDB_DATA outdata;
+ struct ctdb_node_mapv4 *nodemapv4;
int32_t res;
ret = ctdb_control(ctdb, destnode, 0,
- CTDB_CONTROL_GET_NODEMAP, 0, tdb_null,
+ CTDB_CONTROL_GET_NODEMAPv4, 0, tdb_null,
mem_ctx, &outdata, &res, &timeout, NULL);
if (ret != 0 || res != 0 || outdata.dsize == 0) {
- DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed\n"));
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodesv4 failed ret:%d res:%d\n", ret, res));
return -1;
}
- *nodemap = (struct ctdb_node_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+ nodemapv4 = (struct ctdb_node_mapv4 *)outdata.dptr;
+
+ len = offsetof(struct ctdb_node_map, nodes) + nodemapv4->num*sizeof(struct ctdb_node_and_flags);
+ (*nodemap) = talloc_zero_size(mem_ctx, len);
+ CTDB_NO_MEMORY(ctdb, (*nodemap));
+
+ (*nodemap)->num = nodemapv4->num;
+ for (i=0; i<nodemapv4->num; i++) {
+ (*nodemap)->nodes[i].pnn = nodemapv4->nodes[i].pnn;
+ (*nodemap)->nodes[i].flags = nodemapv4->nodes[i].flags;
+ (*nodemap)->nodes[i].addr.ip = nodemapv4->nodes[i].sin;
+ (*nodemap)->nodes[i].addr.sa.sa_family = AF_INET;
+ }
+
talloc_free(outdata.dptr);
return 0;
len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*vnnmap->size;
map = talloc_size(mem_ctx, len);
- CTDB_NO_MEMORY_VOID(ctdb, map);
+ CTDB_NO_MEMORY(ctdb, map);
map->generation = vnnmap->generation;
map->size = vnnmap->size;
return 0;
}
+/*
+ get the health status of a db
+ */
+int ctdb_ctrl_getdbhealth(struct ctdb_context *ctdb,
+ struct timeval timeout,
+ uint32_t destnode,
+ uint32_t dbid, TALLOC_CTX *mem_ctx,
+ const char **reason)
+{
+ int ret;
+ int32_t res;
+ TDB_DATA data;
+
+ data.dptr = (uint8_t *)&dbid;
+ data.dsize = sizeof(dbid);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_DB_GET_HEALTH, 0, data,
+ mem_ctx, &data, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ return -1;
+ }
+
+ if (data.dsize == 0) {
+ (*reason) = NULL;
+ return 0;
+ }
+
+ (*reason) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
+ if ((*reason) == NULL) {
+ return -1;
+ }
+
+ talloc_free(data.dptr);
+
+ return 0;
+}
+
/*
create a database
*/
return 0;
}
+/*
+ this is a plain fetch procedure that all databases support
+ this returns the full record including the ltdb header
+*/
+static int ctdb_fetch_with_header_func(struct ctdb_call_info *call)
+{
+ call->reply_data = talloc(call, TDB_DATA);
+ if (call->reply_data == NULL) {
+ return -1;
+ }
+ call->reply_data->dsize = sizeof(struct ctdb_ltdb_header) + call->record_data.dsize;
+ call->reply_data->dptr = talloc_size(call->reply_data, call->reply_data->dsize);
+ if (call->reply_data->dptr == NULL) {
+ return -1;
+ }
+ memcpy(call->reply_data->dptr, call->header, sizeof(struct ctdb_ltdb_header));
+ memcpy(&call->reply_data->dptr[sizeof(struct ctdb_ltdb_header)], call->record_data.dptr, call->record_data.dsize);
+
+ return 0;
+}
+
/*
attach to a specific database - client call
*/
-struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name, bool persistent)
+struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb,
+ struct timeval timeout,
+ const char *name,
+ bool persistent,
+ uint32_t tdb_flags)
{
struct ctdb_db_context *ctdb_db;
TDB_DATA data;
data.dsize = strlen(name)+1;
/* tell ctdb daemon to attach */
- ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, tdb_flags,
persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
0, data, ctdb_db, &data, &res, NULL, NULL);
if (ret != 0 || res != 0 || data.dsize != sizeof(uint32_t)) {
ctdb_db->db_id = *(uint32_t *)data.dptr;
talloc_free(data.dptr);
- ret = ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(2, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
+ ret = ctdb_ctrl_getdbpath(ctdb, timeout, CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name));
talloc_free(ctdb_db);
return NULL;
}
- ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 0, persistent?TDB_DEFAULT:TDB_NOSYNC, O_RDWR, 0);
+ tdb_flags = persistent?TDB_DEFAULT:TDB_NOSYNC;
+ if (ctdb->valgrinding) {
+ tdb_flags |= TDB_NOMMAP;
+ }
+ tdb_flags |= TDB_DISALLOW_NESTING;
+
+ ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 0, tdb_flags, O_RDWR, 0);
if (ctdb_db->ltdb == NULL) {
ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path);
talloc_free(ctdb_db);
/* add well known functions */
ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC);
ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC);
+ ctdb_set_call(ctdb_db, ctdb_fetch_with_header_func, CTDB_FETCH_WITH_HEADER_FUNC);
return ctdb_db;
}
return;
}
+ if (data.dsize == sizeof(struct ctdb_ltdb_header)) {
+ /* empty records are deleted records in ctdb */
+ return;
+ }
+
if (state->fn(ctdb, key, data, state->private_data) != 0) {
state->done = True;
}
state.private_data = private_data;
state.fn = fn;
- ret = ctdb_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
+ ret = ctdb_client_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to setup traverse handler\n"));
return -1;
data, NULL, NULL, &status, NULL, NULL);
if (ret != 0 || status != 0) {
DEBUG(DEBUG_ERR,("ctdb_traverse_all failed\n"));
- ctdb_remove_message_handler(ctdb_db->ctdb, srvid, &state);
+ ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
return -1;
}
event_loop_once(ctdb_db->ctdb->ev);
}
- ret = ctdb_remove_message_handler(ctdb_db->ctdb, srvid, &state);
+ ret = ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to remove ctdb_traverse handler\n"));
return -1;
return state.count;
}
+#define ISASCII(x) (isprint(x) && !strchr("\"\\", (x)))
/*
called on each key during a catdb
*/
-static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
+int ctdb_dumpdb_record(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
{
int i;
FILE *f = (FILE *)p;
struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
- fprintf(f, "dmaster: %u\n", h->dmaster);
- fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
-
- fprintf(f, "key(%d) = \"", key.dsize);
+ fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
for (i=0;i<key.dsize;i++) {
- if (isascii(key.dptr[i])) {
+ if (ISASCII(key.dptr[i])) {
fprintf(f, "%c", key.dptr[i]);
} else {
fprintf(f, "\\%02X", key.dptr[i]);
}
fprintf(f, "\"\n");
- fprintf(f, "data(%d) = \"", data.dsize);
+ fprintf(f, "dmaster: %u\n", h->dmaster);
+ fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
+ fprintf(f, "flags: 0x%08x", h->flags);
+ if (h->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA) printf(" MIGRATED_WITH_DATA");
+ if (h->flags & CTDB_REC_FLAG_VACUUM_MIGRATED) printf(" VACUUM_MIGRATED");
+ if (h->flags & CTDB_REC_FLAG_AUTOMATIC) printf(" AUTOMATIC");
+ if (h->flags & CTDB_REC_RO_HAVE_DELEGATIONS) printf(" RO_HAVE_DELEGATIONS");
+ if (h->flags & CTDB_REC_RO_HAVE_READONLY) printf(" RO_HAVE_READONLY");
+ if (h->flags & CTDB_REC_RO_REVOKING_READONLY) printf(" RO_REVOKING_READONLY");
+ if (h->flags & CTDB_REC_RO_REVOKE_COMPLETE) printf(" RO_REVOKE_COMPLETE");
+ fprintf(f, "\n");
+
+ fprintf(f, "data(%u) = \"", (unsigned)(data.dsize - sizeof(*h)));
for (i=sizeof(*h);i<data.dsize;i++) {
- if (isascii(data.dptr[i])) {
+ if (ISASCII(data.dptr[i])) {
fprintf(f, "%c", data.dptr[i]);
} else {
fprintf(f, "\\%02X", data.dptr[i]);
}
fprintf(f, "\"\n");
+ fprintf(f, "\n");
+
return 0;
}
*/
int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f)
{
- return ctdb_traverse(ctdb_db, dumpdb_fn, f);
+ return ctdb_traverse(ctdb_db, ctdb_dumpdb_record, f);
}
/*
async freeze send control
*/
struct ctdb_client_control_state *
-ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
+ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t priority)
{
- return ctdb_control_send(ctdb, destnode, 0,
+ return ctdb_control_send(ctdb, destnode, priority,
CTDB_CONTROL_FREEZE, 0, tdb_null,
mem_ctx, &timeout, NULL);
}
}
/*
- freeze a node
+ freeze databases of a certain priority
*/
-int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+int ctdb_ctrl_freeze_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
{
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
struct ctdb_client_control_state *state;
int ret;
- state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode);
+ state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode, priority);
ret = ctdb_ctrl_freeze_recv(ctdb, tmp_ctx, state);
talloc_free(tmp_ctx);
return ret;
}
+/* Freeze all databases */
+int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+{
+ int i;
+
+ for (i=1; i<=NUM_DB_PRIORITIES; i++) {
+ if (ctdb_ctrl_freeze_priority(ctdb, timeout, destnode, i) != 0) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
/*
- thaw a node
+ thaw databases of a certain priority
*/
-int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+int ctdb_ctrl_thaw_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
{
int ret;
int32_t res;
- ret = ctdb_control(ctdb, destnode, 0,
+ ret = ctdb_control(ctdb, destnode, priority,
CTDB_CONTROL_THAW, 0, tdb_null,
NULL, NULL, &res, &timeout, NULL);
if (ret != 0 || res != 0) {
return 0;
}
+/* thaw all databases */
+int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+{
+ return ctdb_ctrl_thaw_priority(ctdb, timeout, destnode, 0);
+}
+
/*
get pnn of a node, or -1
*/
uint32_t destnode, struct ctdb_public_ip *ip)
{
TDB_DATA data;
+ struct ctdb_public_ipv4 ipv4;
int ret;
int32_t res;
- data.dsize = sizeof(*ip);
- data.dptr = (uint8_t *)ip;
+ if (ip->addr.sa.sa_family == AF_INET) {
+ ipv4.pnn = ip->pnn;
+ ipv4.sin = ip->addr.ip;
+
+ data.dsize = sizeof(ipv4);
+ data.dptr = (uint8_t *)&ipv4;
+
+ ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IPv4, 0, data, NULL,
+ NULL, &res, &timeout, NULL);
+ } else {
+ data.dsize = sizeof(*ip);
+ data.dptr = (uint8_t *)ip;
- ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IP, 0, data, NULL,
+ ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IP, 0, data, NULL,
NULL, &res, &timeout, NULL);
+ }
if (ret != 0 || res != 0) {
DEBUG(DEBUG_ERR,(__location__ " ctdb_control for takeover_ip failed\n"));
uint32_t destnode, struct ctdb_public_ip *ip)
{
TDB_DATA data;
+ struct ctdb_public_ipv4 ipv4;
int ret;
int32_t res;
- data.dsize = sizeof(*ip);
- data.dptr = (uint8_t *)ip;
+ if (ip->addr.sa.sa_family == AF_INET) {
+ ipv4.pnn = ip->pnn;
+ ipv4.sin = ip->addr.ip;
- ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IP, 0, data, NULL,
- NULL, &res, &timeout, NULL);
+ data.dsize = sizeof(ipv4);
+ data.dptr = (uint8_t *)&ipv4;
+
+ ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IPv4, 0, data, NULL,
+ NULL, &res, &timeout, NULL);
+ } else {
+ data.dsize = sizeof(*ip);
+ data.dptr = (uint8_t *)ip;
+
+ ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IP, 0, data, NULL,
+ NULL, &res, &timeout, NULL);
+ }
if (ret != 0 || res != 0) {
DEBUG(DEBUG_ERR,(__location__ " ctdb_control for release_ip failed\n"));
}
-int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
- struct timeval timeout, uint32_t destnode,
- TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
+int ctdb_ctrl_get_public_ips_flags(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ uint32_t flags,
+ struct ctdb_all_public_ips **ips)
{
int ret;
TDB_DATA outdata;
int32_t res;
ret = ctdb_control(ctdb, destnode, 0,
- CTDB_CONTROL_GET_PUBLIC_IPS, 0, tdb_null,
+ CTDB_CONTROL_GET_PUBLIC_IPS, flags, tdb_null,
mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret == 0 && res == -1) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control to get public ips failed, falling back to ipv4-only version\n"));
+ return ctdb_ctrl_get_public_ipsv4(ctdb, timeout, destnode, mem_ctx, ips);
+ }
if (ret != 0 || res != 0) {
- DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed\n"));
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed ret:%d res:%d\n", ret, res));
return -1;
}
return 0;
}
+int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_all_public_ips **ips)
+{
+ return ctdb_ctrl_get_public_ips_flags(ctdb, timeout,
+ destnode, mem_ctx,
+ 0, ips);
+}
+
+int ctdb_ctrl_get_public_ipsv4(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
+{
+ int ret, i, len;
+ TDB_DATA outdata;
+ int32_t res;
+ struct ctdb_all_public_ipsv4 *ipsv4;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_PUBLIC_IPSv4, 0, tdb_null,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed\n"));
+ return -1;
+ }
+
+ ipsv4 = (struct ctdb_all_public_ipsv4 *)outdata.dptr;
+ len = offsetof(struct ctdb_all_public_ips, ips) +
+ ipsv4->num*sizeof(struct ctdb_public_ip);
+ *ips = talloc_zero_size(mem_ctx, len);
+ CTDB_NO_MEMORY(ctdb, *ips);
+ (*ips)->num = ipsv4->num;
+ for (i=0; i<ipsv4->num; i++) {
+ (*ips)->ips[i].pnn = ipsv4->ips[i].pnn;
+ (*ips)->ips[i].addr.ip = ipsv4->ips[i].sin;
+ }
+
+ talloc_free(outdata.dptr);
+
+ return 0;
+}
+
+int ctdb_ctrl_get_public_ip_info(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ const ctdb_sock_addr *addr,
+ struct ctdb_control_public_ip_info **_info)
+{
+ int ret;
+ TDB_DATA indata;
+ TDB_DATA outdata;
+ int32_t res;
+ struct ctdb_control_public_ip_info *info;
+ uint32_t len;
+ uint32_t i;
+
+ indata.dptr = discard_const_p(uint8_t, addr);
+ indata.dsize = sizeof(*addr);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_PUBLIC_IP_INFO, 0, indata,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
+ "failed ret:%d res:%d\n",
+ ret, res));
+ return -1;
+ }
+
+ len = offsetof(struct ctdb_control_public_ip_info, ifaces);
+ if (len > outdata.dsize) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
+ "returned invalid data with size %u > %u\n",
+ (unsigned int)outdata.dsize,
+ (unsigned int)len));
+ dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
+ return -1;
+ }
+
+ info = (struct ctdb_control_public_ip_info *)outdata.dptr;
+ len += info->num*sizeof(struct ctdb_control_iface_info);
+
+ if (len > outdata.dsize) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
+ "returned invalid data with size %u > %u\n",
+ (unsigned int)outdata.dsize,
+ (unsigned int)len));
+ dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
+ return -1;
+ }
+
+ /* make sure we null terminate the returned strings */
+ for (i=0; i < info->num; i++) {
+ info->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
+ }
+
+ *_info = (struct ctdb_control_public_ip_info *)talloc_memdup(mem_ctx,
+ outdata.dptr,
+ outdata.dsize);
+ talloc_free(outdata.dptr);
+ if (*_info == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
+ "talloc_memdup size %u failed\n",
+ (unsigned int)outdata.dsize));
+ return -1;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_control_get_ifaces **_ifaces)
+{
+ int ret;
+ TDB_DATA outdata;
+ int32_t res;
+ struct ctdb_control_get_ifaces *ifaces;
+ uint32_t len;
+ uint32_t i;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_IFACES, 0, tdb_null,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+ "failed ret:%d res:%d\n",
+ ret, res));
+ return -1;
+ }
+
+ len = offsetof(struct ctdb_control_get_ifaces, ifaces);
+ if (len > outdata.dsize) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+ "returned invalid data with size %u > %u\n",
+ (unsigned int)outdata.dsize,
+ (unsigned int)len));
+ dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
+ return -1;
+ }
+
+ ifaces = (struct ctdb_control_get_ifaces *)outdata.dptr;
+ len += ifaces->num*sizeof(struct ctdb_control_iface_info);
+
+ if (len > outdata.dsize) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+ "returned invalid data with size %u > %u\n",
+ (unsigned int)outdata.dsize,
+ (unsigned int)len));
+ dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
+ return -1;
+ }
+
+ /* make sure we null terminate the returned strings */
+ for (i=0; i < ifaces->num; i++) {
+ ifaces->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
+ }
+
+ *_ifaces = (struct ctdb_control_get_ifaces *)talloc_memdup(mem_ctx,
+ outdata.dptr,
+ outdata.dsize);
+ talloc_free(outdata.dptr);
+ if (*_ifaces == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+ "talloc_memdup size %u failed\n",
+ (unsigned int)outdata.dsize));
+ return -1;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_iface_link(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ const struct ctdb_control_iface_info *info)
+{
+ int ret;
+ TDB_DATA indata;
+ int32_t res;
+
+ indata.dptr = discard_const_p(uint8_t, info);
+ indata.dsize = sizeof(*info);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_SET_IFACE_LINK_STATE, 0, indata,
+ mem_ctx, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set iface link "
+ "failed ret:%d res:%d\n",
+ ret, res));
+ return -1;
+ }
+
+ return 0;
+}
+
/*
set/clear the permanent disabled bit on a remote node
*/
{
int ret;
TDB_DATA data;
- struct ctdb_node_modflags m;
- int32_t res;
+ struct ctdb_node_map *nodemap=NULL;
+ struct ctdb_node_flag_change c;
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+ uint32_t recmaster;
+ uint32_t *nodes;
+
+
+ /* find the recovery master */
+ ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
+ talloc_free(tmp_ctx);
+ return ret;
+ }
+
+
+ /* read the node flags from the recmaster */
+ ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ if (destnode >= nodemap->num) {
+ DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
- m.set = set;
- m.clear = clear;
+ c.pnn = destnode;
+ c.old_flags = nodemap->nodes[destnode].flags;
+ c.new_flags = c.old_flags;
+ c.new_flags |= set;
+ c.new_flags &= ~clear;
- data.dsize = sizeof(m);
- data.dptr = (unsigned char *)&m;
+ data.dsize = sizeof(c);
+ data.dptr = (unsigned char *)&c;
- ret = ctdb_control(ctdb, destnode, 0,
- CTDB_CONTROL_MODIFY_FLAGS, 0, data,
- NULL, NULL, &res, &timeout, NULL);
- if (ret != 0 || res != 0) {
- DEBUG(DEBUG_ERR,(__location__ " ctdb_control for modflags failed\n"));
+ /* send the flags update to all connected nodes */
+ nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
+ nodes, 0,
+ timeout, false, data,
+ NULL, NULL,
+ NULL) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+
+ talloc_free(tmp_ctx);
return -1;
}
+ talloc_free(tmp_ctx);
return 0;
}
int ctdb_ctrl_gratious_arp(struct ctdb_context *ctdb,
struct timeval timeout,
uint32_t destnode,
- struct sockaddr_in *sin,
+ ctdb_sock_addr *addr,
const char *ifname)
{
TDB_DATA data;
int32_t res;
int ret, len;
- struct ctdb_control_ip_iface *gratious_arp;
+ struct ctdb_control_gratious_arp *gratious_arp;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
len = strlen(ifname)+1;
gratious_arp = talloc_size(tmp_ctx,
- offsetof(struct ctdb_control_ip_iface, iface) + len);
+ offsetof(struct ctdb_control_gratious_arp, iface) + len);
CTDB_NO_MEMORY(ctdb, gratious_arp);
- gratious_arp->sin = *sin;
+ gratious_arp->addr = *addr;
gratious_arp->len = len;
memcpy(&gratious_arp->iface[0], ifname, len);
- data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + len;
+ data.dsize = offsetof(struct ctdb_control_gratious_arp, iface) + len;
data.dptr = (unsigned char *)gratious_arp;
ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SEND_GRATIOUS_ARP, 0, data, NULL,
int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx,
- struct sockaddr_in *ip,
+ ctdb_sock_addr *addr,
struct ctdb_control_tcp_tickle_list **list)
{
int ret;
TDB_DATA data, outdata;
int32_t status;
- data.dptr = (uint8_t*)ip;
- data.dsize = sizeof(struct sockaddr_in);
+ data.dptr = (uint8_t*)addr;
+ data.dsize = sizeof(ctdb_sock_addr);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data,
mem_ctx, &outdata, &status, NULL, NULL);
- if (ret != 0) {
+ if (ret != 0 || status != 0) {
DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get tcp tickles failed\n"));
return -1;
}
*/
struct ctdb_context *ctdb_init(struct event_context *ev)
{
+ int ret;
struct ctdb_context *ctdb;
ctdb = talloc_zero(ev, struct ctdb_context);
+ if (ctdb == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " talloc_zero failed.\n"));
+ return NULL;
+ }
ctdb->ev = ev;
ctdb->idr = idr_init(ctdb);
+ /* Wrap early to exercise code. */
+ ctdb->lastid = INT_MAX-200;
CTDB_NO_MEMORY_NULL(ctdb, ctdb->idr);
- ctdb_set_socketname(ctdb, CTDB_PATH);
+ ret = ctdb_set_socketname(ctdb, CTDB_PATH);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_set_socketname failed.\n"));
+ talloc_free(ctdb);
+ return NULL;
+ }
+
+ ctdb->statistics.statistics_start_time = timeval_current();
return ctdb;
}
int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
{
ctdb->daemon.name = talloc_strdup(ctdb, socketname);
+ CTDB_NO_MEMORY(ctdb, ctdb->daemon.name);
+
return 0;
}
+const char *ctdb_get_socketname(struct ctdb_context *ctdb)
+{
+ return ctdb->daemon.name;
+}
+
/*
return the pnn of this node
*/
static void async_callback(struct ctdb_client_control_state *state)
{
struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
+ struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
int ret;
+ TDB_DATA outdata;
int32_t res;
+ uint32_t destnode = state->c->hdr.destnode;
/* one more node has responded with recmode data */
data->count--;
*/
if (state->state != CTDB_CONTROL_DONE) {
if ( !data->dont_log_errors) {
- DEBUG(DEBUG_ERR,("Async operation failed with state %d\n", state->state));
+ DEBUG(DEBUG_ERR,("Async operation failed with state %d, opcode:%u\n", state->state, data->opcode));
}
data->fail_count++;
+ if (data->fail_callback) {
+ data->fail_callback(ctdb, destnode, res, outdata,
+ data->callback_data);
+ }
return;
}
state->async.fn = NULL;
- ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL);
+ ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL);
if ((ret != 0) || (res != 0)) {
if ( !data->dont_log_errors) {
- DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d\n", ret, (int)res));
+ DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d opcode=%u\n", ret, (int)res, data->opcode));
}
data->fail_count++;
+ if (data->fail_callback) {
+ data->fail_callback(ctdb, destnode, res, outdata,
+ data->callback_data);
+ }
+ }
+ if ((ret == 0) && (data->callback != NULL)) {
+ data->callback(ctdb, destnode, res, outdata,
+ data->callback_data);
}
}
int ctdb_client_async_control(struct ctdb_context *ctdb,
enum ctdb_controls opcode,
uint32_t *nodes,
+ uint64_t srvid,
struct timeval timeout,
bool dont_log_errors,
- TDB_DATA data)
+ TDB_DATA data,
+ client_async_callback client_callback,
+ client_async_callback fail_callback,
+ void *callback_data)
{
struct client_async_data *async_data;
struct ctdb_client_control_state *state;
int j, num_nodes;
-
+
async_data = talloc_zero(ctdb, struct client_async_data);
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
async_data->dont_log_errors = dont_log_errors;
+ async_data->callback = client_callback;
+ async_data->fail_callback = fail_callback;
+ async_data->callback_data = callback_data;
+ async_data->opcode = opcode;
num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
for (j=0; j<num_nodes; j++) {
uint32_t pnn = nodes[j];
- state = ctdb_control_send(ctdb, pnn, 0, opcode,
+ state = ctdb_control_send(ctdb, pnn, srvid, opcode,
0, data, async_data, &timeout, NULL);
if (state == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
return nodes;
}
+uint32_t *list_of_active_nodes_except_pnn(struct ctdb_context *ctdb,
+ struct ctdb_node_map *node_map,
+ TALLOC_CTX *mem_ctx,
+ uint32_t pnn)
+{
+ int i, j, num_nodes;
+ uint32_t *nodes;
+
+ for (i=num_nodes=0;i<node_map->num;i++) {
+ if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+ if (node_map->nodes[i].pnn == pnn) {
+ continue;
+ }
+ num_nodes++;
+ }
+
+ nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+ CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+ for (i=j=0;i<node_map->num;i++) {
+ if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+ if (node_map->nodes[i].pnn == pnn) {
+ continue;
+ }
+ nodes[j++] = node_map->nodes[i].pnn;
+ }
+
+ return nodes;
+}
+
+uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
+ struct ctdb_node_map *node_map,
+ TALLOC_CTX *mem_ctx,
+ bool include_self)
+{
+ int i, j, num_nodes;
+ uint32_t *nodes;
+
+ for (i=num_nodes=0;i<node_map->num;i++) {
+ if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+ if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
+ continue;
+ }
+ num_nodes++;
+ }
+
+ nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+ CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+ for (i=j=0;i<node_map->num;i++) {
+ if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+ if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
+ continue;
+ }
+ nodes[j++] = node_map->nodes[i].pnn;
+ }
+
+ return nodes;
+}
+
/*
this is used to test if a pnn lock exists and if it exists will return
the number of connections that pnn has reported or -1 if that recovery
{
int ret;
int32_t res;
+ TDB_DATA outdata;
- ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
- if (ret != 0) {
+ ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
+ if ( (ret != 0) || (res != 0) ) {
DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n"));
return -1;
}
if (capabilities) {
- *capabilities = (uint32_t)res;
+ *capabilities = *((uint32_t *)outdata.dptr);
}
return 0;
talloc_free(tmp_ctx);
return ret;
}
+
+/**
+ * check whether a transaction is active on a given db on a given node
+ */
+int32_t ctdb_ctrl_transaction_active(struct ctdb_context *ctdb,
+ uint32_t destnode,
+ uint32_t db_id)
+{
+ int32_t status;
+ int ret;
+ TDB_DATA indata;
+
+ indata.dptr = (uint8_t *)&db_id;
+ indata.dsize = sizeof(db_id);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_TRANS2_ACTIVE,
+ 0, indata, NULL, NULL, &status,
+ NULL, NULL);
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " ctdb control for transaction_active failed\n"));
+ return -1;
+ }
+
+ return status;
+}
+
+
+struct ctdb_transaction_handle {
+ struct ctdb_db_context *ctdb_db;
+ bool in_replay;
+ /*
+ * we store the reads and writes done under a transaction:
+ * - one list stores both reads and writes (m_all),
+ * - the other just writes (m_write)
+ */
+ struct ctdb_marshall_buffer *m_all;
+ struct ctdb_marshall_buffer *m_write;
+};
+
+/* start a transaction on a database */
+static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h)
+{
+ tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
+ return 0;
+}
+
+/* start a transaction on a database */
+static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
+{
+ struct ctdb_record_handle *rh;
+ TDB_DATA key;
+ TDB_DATA data;
+ struct ctdb_ltdb_header header;
+ TALLOC_CTX *tmp_ctx;
+ const char *keyname = CTDB_TRANSACTION_LOCK_KEY;
+ int ret;
+ struct ctdb_db_context *ctdb_db = h->ctdb_db;
+ pid_t pid;
+ int32_t status;
+
+ key.dptr = discard_const(keyname);
+ key.dsize = strlen(keyname);
+
+ if (!ctdb_db->persistent) {
+ DEBUG(DEBUG_ERR,(__location__ " Attempted transaction on non-persistent database\n"));
+ return -1;
+ }
+
+again:
+ tmp_ctx = talloc_new(h);
+
+ rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL);
+ if (rh == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ status = ctdb_ctrl_transaction_active(ctdb_db->ctdb,
+ CTDB_CURRENT_NODE,
+ ctdb_db->db_id);
+ if (status == 1) {
+ unsigned long int usec = (1000 + random()) % 100000;
+ DEBUG(DEBUG_DEBUG, (__location__ " transaction is active "
+ "on db_id[0x%08x]. waiting for %lu "
+ "microseconds\n",
+ ctdb_db->db_id, usec));
+ talloc_free(tmp_ctx);
+ usleep(usec);
+ goto again;
+ }
+
+ /*
+ * store the pid in the database:
+ * it is not enough that the node is dmaster...
+ */
+ pid = getpid();
+ data.dptr = (unsigned char *)&pid;
+ data.dsize = sizeof(pid_t);
+ rh->header.rsn++;
+ rh->header.dmaster = ctdb_db->ctdb->pnn;
+ ret = ctdb_ltdb_store(ctdb_db, key, &(rh->header), data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to store pid in "
+ "transaction record\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ talloc_free(rh);
+
+ ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to start tdb transaction\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, &data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to re-fetch transaction "
+ "lock record inside transaction\n"));
+ tdb_transaction_cancel(ctdb_db->ltdb->tdb);
+ talloc_free(tmp_ctx);
+ goto again;
+ }
+
+ if (header.dmaster != ctdb_db->ctdb->pnn) {
+ DEBUG(DEBUG_DEBUG,(__location__ " not dmaster any more on "
+ "transaction lock record\n"));
+ tdb_transaction_cancel(ctdb_db->ltdb->tdb);
+ talloc_free(tmp_ctx);
+ goto again;
+ }
+
+ if ((data.dsize != sizeof(pid_t)) || (*(pid_t *)(data.dptr) != pid)) {
+ DEBUG(DEBUG_DEBUG, (__location__ " my pid is not stored in "
+ "the transaction lock record\n"));
+ tdb_transaction_cancel(ctdb_db->ltdb->tdb);
+ talloc_free(tmp_ctx);
+ goto again;
+ }
+
+ talloc_free(tmp_ctx);
+
+ return 0;
+}
+
+
+/* start a transaction on a database */
+struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_transaction_handle *h;
+ int ret;
+
+ h = talloc_zero(mem_ctx, struct ctdb_transaction_handle);
+ if (h == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n"));
+ return NULL;
+ }
+
+ h->ctdb_db = ctdb_db;
+
+ ret = ctdb_transaction_fetch_start(h);
+ if (ret != 0) {
+ talloc_free(h);
+ return NULL;
+ }
+
+ talloc_set_destructor(h, ctdb_transaction_destructor);
+
+ return h;
+}
+
+
+
+/*
+ fetch a record inside a transaction
+ */
+int ctdb_transaction_fetch(struct ctdb_transaction_handle *h,
+ TALLOC_CTX *mem_ctx,
+ TDB_DATA key, TDB_DATA *data)
+{
+ struct ctdb_ltdb_header header;
+ int ret;
+
+ ZERO_STRUCT(header);
+
+ ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data);
+ if (ret == -1 && header.dmaster == (uint32_t)-1) {
+ /* record doesn't exist yet */
+ *data = tdb_null;
+ ret = 0;
+ }
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (!h->in_replay) {
+ h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data);
+ if (h->m_all == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ stores a record inside a transaction
+ */
+int ctdb_transaction_store(struct ctdb_transaction_handle *h,
+ TDB_DATA key, TDB_DATA data)
+{
+ TALLOC_CTX *tmp_ctx = talloc_new(h);
+ struct ctdb_ltdb_header header;
+ TDB_DATA olddata;
+ int ret;
+
+ ZERO_STRUCT(header);
+
+ /* we need the header so we can update the RSN */
+ ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
+ if (ret == -1 && header.dmaster == (uint32_t)-1) {
+ /* the record doesn't exist - create one with us as dmaster.
+ This is only safe because we are in a transaction and this
+ is a persistent database */
+ ZERO_STRUCT(header);
+ } else if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n"));
+ talloc_free(tmp_ctx);
+ return ret;
+ }
+
+ if (data.dsize == olddata.dsize &&
+ memcmp(data.dptr, olddata.dptr, data.dsize) == 0) {
+ /* save writing the same data */
+ talloc_free(tmp_ctx);
+ return 0;
+ }
+
+ header.dmaster = h->ctdb_db->ctdb->pnn;
+ header.rsn++;
+
+ if (!h->in_replay) {
+ h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data);
+ if (h->m_all == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ }
+
+ h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
+ if (h->m_write == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data);
+
+ talloc_free(tmp_ctx);
+
+ return ret;
+}
+
+/*
+ replay a transaction
+ */
+static int ctdb_replay_transaction(struct ctdb_transaction_handle *h)
+{
+ int ret, i;
+ struct ctdb_rec_data *rec = NULL;
+
+ h->in_replay = true;
+ talloc_free(h->m_write);
+ h->m_write = NULL;
+
+ ret = ctdb_transaction_fetch_start(h);
+ if (ret != 0) {
+ return ret;
+ }
+
+ for (i=0;i<h->m_all->count;i++) {
+ TDB_DATA key, data;
+
+ rec = ctdb_marshall_loop_next(h->m_all, rec, NULL, NULL, &key, &data);
+ if (rec == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of records in ctdb_replay_transaction?\n"));
+ goto failed;
+ }
+
+ if (rec->reqid == 0) {
+ /* its a store */
+ if (ctdb_transaction_store(h, key, data) != 0) {
+ goto failed;
+ }
+ } else {
+ TDB_DATA data2;
+ TALLOC_CTX *tmp_ctx = talloc_new(h);
+
+ if (ctdb_transaction_fetch(h, tmp_ctx, key, &data2) != 0) {
+ talloc_free(tmp_ctx);
+ goto failed;
+ }
+ if (data2.dsize != data.dsize ||
+ memcmp(data2.dptr, data.dptr, data.dsize) != 0) {
+ /* the record has changed on us - we have to give up */
+ talloc_free(tmp_ctx);
+ goto failed;
+ }
+ talloc_free(tmp_ctx);
+ }
+ }
+
+ return 0;
+
+failed:
+ tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
+ return -1;
+}
+
+
+/*
+ commit a transaction
+ */
+int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
+{
+ int ret, retries=0;
+ int32_t status;
+ struct ctdb_context *ctdb = h->ctdb_db->ctdb;
+ struct timeval timeout;
+ enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR;
+
+ talloc_set_destructor(h, NULL);
+
+ /* our commit strategy is quite complex.
+
+ - we first try to commit the changes to all other nodes
+
+ - if that works, then we commit locally and we are done
+
+ - if a commit on another node fails, then we need to cancel
+ the transaction, then restart the transaction (thus
+ opening a window of time for a pending recovery to
+ complete), then replay the transaction, checking all the
+ reads and writes (checking that reads give the same data,
+ and writes succeed). Then we retry the transaction to the
+ other nodes
+ */
+
+again:
+ if (h->m_write == NULL) {
+ /* no changes were made */
+ tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
+ talloc_free(h);
+ return 0;
+ }
+
+ /* tell ctdbd to commit to the other nodes */
+ timeout = timeval_current_ofs(1, 0);
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
+ retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, 0,
+ ctdb_marshall_finish(h->m_write), NULL, NULL, &status,
+ &timeout, NULL);
+ if (ret != 0 || status != 0) {
+ tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
+ DEBUG(DEBUG_NOTICE, (__location__ " transaction commit%s failed"
+ ", retrying after 1 second...\n",
+ (retries==0)?"":"retry "));
+ sleep(1);
+
+ if (ret != 0) {
+ failure_control = CTDB_CONTROL_TRANS2_ERROR;
+ } else {
+ /* work out what error code we will give if we
+ have to fail the operation */
+ switch ((enum ctdb_trans2_commit_error)status) {
+ case CTDB_TRANS2_COMMIT_SUCCESS:
+ case CTDB_TRANS2_COMMIT_SOMEFAIL:
+ case CTDB_TRANS2_COMMIT_TIMEOUT:
+ failure_control = CTDB_CONTROL_TRANS2_ERROR;
+ break;
+ case CTDB_TRANS2_COMMIT_ALLFAIL:
+ failure_control = CTDB_CONTROL_TRANS2_FINISHED;
+ break;
+ }
+ }
+
+ if (++retries == 100) {
+ DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n",
+ h->ctdb_db->db_id, retries, (unsigned)failure_control));
+ ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
+ failure_control, CTDB_CTRL_FLAG_NOREPLY,
+ tdb_null, NULL, NULL, NULL, NULL, NULL);
+ talloc_free(h);
+ return -1;
+ }
+
+ if (ctdb_replay_transaction(h) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to replay "
+ "transaction on db 0x%08x, "
+ "failure control =%u\n",
+ h->ctdb_db->db_id,
+ (unsigned)failure_control));
+ ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
+ failure_control, CTDB_CTRL_FLAG_NOREPLY,
+ tdb_null, NULL, NULL, NULL, NULL, NULL);
+ talloc_free(h);
+ return -1;
+ }
+ goto again;
+ } else {
+ failure_control = CTDB_CONTROL_TRANS2_ERROR;
+ }
+
+ /* do the real commit locally */
+ ret = tdb_transaction_commit(h->ctdb_db->ltdb->tdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to commit transaction "
+ "on db id 0x%08x locally, "
+ "failure_control=%u\n",
+ h->ctdb_db->db_id,
+ (unsigned)failure_control));
+ ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
+ failure_control, CTDB_CTRL_FLAG_NOREPLY,
+ tdb_null, NULL, NULL, NULL, NULL, NULL);
+ talloc_free(h);
+ return ret;
+ }
+
+ /* tell ctdbd that we are finished with our local commit */
+ ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
+ CTDB_CONTROL_TRANS2_FINISHED, CTDB_CTRL_FLAG_NOREPLY,
+ tdb_null, NULL, NULL, NULL, NULL, NULL);
+ talloc_free(h);
+ return 0;
+}
+
+/*
+ recovery daemon ping to main daemon
+ */
+int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
+ ctdb, NULL, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/* when forking the main daemon and the child process needs to connect back
+ * to the daemon as a client process, this function can be used to change
+ * the ctdb context from daemon into client mode
+ */
+int switch_from_server_to_client(struct ctdb_context *ctdb, const char *fmt, ...)
+{
+ int ret;
+ va_list ap;
+
+ /* Add extra information so we can identify this in the logs */
+ va_start(ap, fmt);
+ debug_extra = talloc_strdup_append(talloc_vasprintf(NULL, fmt, ap), ":");
+ va_end(ap);
+
+ /* shutdown the transport */
+ if (ctdb->methods) {
+ ctdb->methods->shutdown(ctdb);
+ }
+
+ /* get a new event context */
+ talloc_free(ctdb->ev);
+ ctdb->ev = event_context_init(ctdb);
+ tevent_loop_allow_nesting(ctdb->ev);
+
+ close(ctdb->daemon.sd);
+ ctdb->daemon.sd = -1;
+
+ /* the client does not need to be realtime */
+ if (ctdb->do_setsched) {
+ ctdb_restore_scheduler(ctdb);
+ }
+
+ /* initialise ctdb */
+ ret = ctdb_socket_connect(ctdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ get the status of running the monitor eventscripts: NULL means never run.
+ */
+int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx, enum ctdb_eventscript_call type,
+ struct ctdb_scripts_wire **script_status)
+{
+ int ret;
+ TDB_DATA outdata, indata;
+ int32_t res;
+ uint32_t uinttype = type;
+
+ indata.dptr = (uint8_t *)&uinttype;
+ indata.dsize = sizeof(uinttype);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, indata,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getscriptstatus failed ret:%d res:%d\n", ret, res));
+ return -1;
+ }
+
+ if (outdata.dsize == 0) {
+ *script_status = NULL;
+ } else {
+ *script_status = (struct ctdb_scripts_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+ talloc_free(outdata.dptr);
+ }
+
+ return 0;
+}
+
+/*
+ tell the main daemon how long it took to lock the reclock file
+ */
+int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval timeout, double latency)
+{
+ int ret;
+ int32_t res;
+ TDB_DATA data;
+
+ data.dptr = (uint8_t *)&latency;
+ data.dsize = sizeof(latency);
+
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_RECLOCK_LATENCY, 0, data,
+ ctdb, NULL, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send recd reclock latency\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ get the name of the reclock file
+ */
+int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout,
+ uint32_t destnode, TALLOC_CTX *mem_ctx,
+ const char **name)
+{
+ int ret;
+ int32_t res;
+ TDB_DATA data;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
+ mem_ctx, &data, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ return -1;
+ }
+
+ if (data.dsize == 0) {
+ *name = NULL;
+ } else {
+ *name = talloc_strdup(mem_ctx, discard_const(data.dptr));
+ }
+ talloc_free(data.dptr);
+
+ return 0;
+}
+
+/*
+ set the reclock filename for a node
+ */
+int ctdb_ctrl_setreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *reclock)
+{
+ int ret;
+ TDB_DATA data;
+ int32_t res;
+
+ if (reclock == NULL) {
+ data.dsize = 0;
+ data.dptr = NULL;
+ } else {
+ data.dsize = strlen(reclock) + 1;
+ data.dptr = discard_const(reclock);
+ }
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_SET_RECLOCK_FILE, 0, data,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setreclock failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ stop a node
+ */
+int ctdb_ctrl_stop_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_STOP_NODE, 0, tdb_null,
+ ctdb, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to stop node\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ continue a node
+ */
+int ctdb_ctrl_continue_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+{
+ int ret;
+
+ ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CONTINUE_NODE, 0, tdb_null,
+ ctdb, NULL, NULL, &timeout, NULL);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Failed to continue node\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ set the natgw state for a node
+ */
+int ctdb_ctrl_setnatgwstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t natgwstate)
+{
+ int ret;
+ TDB_DATA data;
+ int32_t res;
+
+ data.dsize = sizeof(natgwstate);
+ data.dptr = (uint8_t *)&natgwstate;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_SET_NATGWSTATE, 0, data,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setnatgwstate failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ set the lmaster role for a node
+ */
+int ctdb_ctrl_setlmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t lmasterrole)
+{
+ int ret;
+ TDB_DATA data;
+ int32_t res;
+
+ data.dsize = sizeof(lmasterrole);
+ data.dptr = (uint8_t *)&lmasterrole;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_SET_LMASTERROLE, 0, data,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setlmasterrole failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ set the recmaster role for a node
+ */
+int ctdb_ctrl_setrecmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmasterrole)
+{
+ int ret;
+ TDB_DATA data;
+ int32_t res;
+
+ data.dsize = sizeof(recmasterrole);
+ data.dptr = (uint8_t *)&recmasterrole;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_SET_RECMASTERROLE, 0, data,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmasterrole failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/* enable an eventscript
+ */
+int ctdb_ctrl_enablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
+{
+ int ret;
+ TDB_DATA data;
+ int32_t res;
+
+ data.dsize = strlen(script) + 1;
+ data.dptr = discard_const(script);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_ENABLE_SCRIPT, 0, data,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enablescript failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/* disable an eventscript
+ */
+int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
+{
+ int ret;
+ TDB_DATA data;
+ int32_t res;
+
+ data.dsize = strlen(script) + 1;
+ data.dptr = discard_const(script);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_DISABLE_SCRIPT, 0, data,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disablescript failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime)
+{
+ int ret;
+ TDB_DATA data;
+ int32_t res;
+
+ data.dsize = sizeof(*bantime);
+ data.dptr = (uint8_t *)bantime;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_SET_BAN_STATE, 0, data,
+ NULL, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime)
+{
+ int ret;
+ TDB_DATA outdata;
+ int32_t res;
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_BAN_STATE, 0, tdb_null,
+ tmp_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ *bantime = (struct ctdb_ban_time *)talloc_steal(mem_ctx, outdata.dptr);
+ talloc_free(tmp_ctx);
+
+ return 0;
+}
+
+
+int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio)
+{
+ int ret;
+ int32_t res;
+ TDB_DATA data;
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+
+ data.dptr = (uint8_t*)db_prio;
+ data.dsize = sizeof(*db_prio);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_SET_DB_PRIORITY, 0, data,
+ tmp_ctx, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ talloc_free(tmp_ctx);
+
+ return 0;
+}
+
+int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority)
+{
+ int ret;
+ int32_t res;
+ TDB_DATA data;
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+
+ data.dptr = (uint8_t*)&db_id;
+ data.dsize = sizeof(db_id);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_DB_PRIORITY, 0, data,
+ tmp_ctx, NULL, &res, &timeout, NULL);
+ if (ret != 0 || res < 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ if (priority) {
+ *priority = res;
+ }
+
+ talloc_free(tmp_ctx);
+
+ return 0;
+}
+
+int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats)
+{
+ int ret;
+ TDB_DATA outdata;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_STAT_HISTORY, 0, tdb_null,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0 || outdata.dsize == 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getstathistory failed ret:%d res:%d\n", ret, res));
+ return -1;
+ }
+
+ *stats = (struct ctdb_statistics_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+ talloc_free(outdata.dptr);
+
+ return 0;
+}
+
+struct ctdb_ltdb_header *ctdb_header_from_record_handle(struct ctdb_record_handle *h)
+{
+ if (h == NULL) {
+ return NULL;
+ }
+
+ return &h->header;
+}
+
+
+struct ctdb_client_control_state *
+ctdb_ctrl_updaterecord_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
+{
+ struct ctdb_client_control_state *handle;
+ struct ctdb_marshall_buffer *m;
+ struct ctdb_rec_data *rec;
+ TDB_DATA outdata;
+
+ m = talloc_zero(mem_ctx, struct ctdb_marshall_buffer);
+ if (m == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to allocate marshall buffer for update record\n"));
+ return NULL;
+ }
+
+ m->db_id = ctdb_db->db_id;
+
+ rec = ctdb_marshall_record(m, 0, key, header, data);
+ if (rec == NULL) {
+ DEBUG(DEBUG_ERR,("Failed to marshall record for update record\n"));
+ talloc_free(m);
+ return NULL;
+ }
+ m = talloc_realloc_size(mem_ctx, m, rec->length + offsetof(struct ctdb_marshall_buffer, data));
+ if (m == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " Failed to expand recdata\n"));
+ talloc_free(m);
+ return NULL;
+ }
+ m->count++;
+ memcpy((uint8_t *)m + offsetof(struct ctdb_marshall_buffer, data), rec, rec->length);
+
+
+ outdata.dptr = (uint8_t *)m;
+ outdata.dsize = talloc_get_size(m);
+
+ handle = ctdb_control_send(ctdb, destnode, 0,
+ CTDB_CONTROL_UPDATE_RECORD, 0, outdata,
+ mem_ctx, &timeout, NULL);
+ talloc_free(m);
+ return handle;
+}
+
+int ctdb_ctrl_updaterecord_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control_recv(ctdb, state, state, NULL, &res, NULL);
+ if ( (ret != 0) || (res != 0) ){
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_update_record_recv failed\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+ctdb_ctrl_updaterecord(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
+{
+ struct ctdb_client_control_state *state;
+
+ state = ctdb_ctrl_updaterecord_send(ctdb, mem_ctx, timeout, destnode, ctdb_db, key, header, data);
+ return ctdb_ctrl_updaterecord_recv(ctdb, state);
+}
+
+
+
+
+
+
+/*
+ set a database to be readonly
+ */
+struct ctdb_client_control_state *
+ctdb_ctrl_set_db_readonly_send(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
+{
+ TDB_DATA data;
+
+ data.dptr = (uint8_t *)&dbid;
+ data.dsize = sizeof(dbid);
+
+ return ctdb_control_send(ctdb, destnode, 0,
+ CTDB_CONTROL_SET_DB_READONLY, 0, data,
+ ctdb, NULL, NULL);
+}
+
+int ctdb_ctrl_set_db_readonly_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control_recv(ctdb, state, ctdb, NULL, &res, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_set_db_readonly_recv failed ret:%d res:%d\n", ret, res));
+ return -1;
+ }
+
+ return 0;
+}
+
+int ctdb_ctrl_set_db_readonly(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
+{
+ struct ctdb_client_control_state *state;
+
+ state = ctdb_ctrl_set_db_readonly_send(ctdb, destnode, dbid);
+ return ctdb_ctrl_set_db_readonly_recv(ctdb, state);
+}