#include <stdlib.h>
#include <sys/socket.h>
#include <sys/un.h>
+#include <sys/ioctl.h>
#include "libctdb_private.h"
#include "io_elem.h"
#include "local_tdb.h"
/* Remove type-safety macros. */
#undef ctdb_attachdb_send
#undef ctdb_readrecordlock_async
+#undef ctdb_connect
+
+struct ctdb_lock {
+ struct ctdb_lock *next, *prev;
+
+ struct ctdb_db *ctdb_db;
+ TDB_DATA key;
+
+ /* This will always be set by the time user sees this. */
+ unsigned long held_magic;
+ struct ctdb_ltdb_header *hdr;
+
+ /* For convenience, we stash original callback here. */
+ ctdb_rrl_callback_t callback;
+};
+
+struct ctdb_db {
+ struct ctdb_connection *ctdb;
+ bool persistent;
+ uint32_t tdb_flags;
+ uint32_t id;
+ struct tdb_context *tdb;
+
+ ctdb_callback_t callback;
+ void *private_data;
+};
+
+static void remove_lock(struct ctdb_connection *ctdb, struct ctdb_lock *lock)
+{
+ DLIST_REMOVE(ctdb->locks, lock);
+}
+
+/* FIXME: for thread safety, need tid info too. */
+static bool holding_lock(struct ctdb_connection *ctdb)
+{
+ /* For the moment, you can't ever hold more than 1 lock. */
+ return (ctdb->locks != NULL);
+}
+
+static void add_lock(struct ctdb_connection *ctdb, struct ctdb_lock *lock)
+{
+ DLIST_ADD(ctdb->locks, lock);
+}
+
+static void cleanup_locks(struct ctdb_connection *ctdb, struct ctdb_db *db)
+{
+ struct ctdb_lock *i, *next;
+
+ for (i = ctdb->locks; i; i = next) {
+ /* Grab next pointer, as release_lock will free i */
+ next = i->next;
+ if (i->ctdb_db == db) {
+ ctdb_release_lock(db, i);
+ }
+ }
+}
/* FIXME: Could be in shared util code with rest of ctdb */
static void close_noerr(int fd)
struct ctdb_request *req,
void *unused)
{
- if (ctdb_getpnn_recv(ctdb, req, &ctdb->pnn) != 0) {
- /* FIXME: Report error. */
+ if (!ctdb_getpnn_recv(ctdb, req, &ctdb->pnn)) {
+ DEBUG(ctdb, LOG_CRIT,
+ "ctdb_connect(async): failed to get pnn");
ctdb->broken = true;
}
ctdb_request_free(ctdb, req);
}
-struct ctdb_connection *ctdb_connect(const char *addr)
+struct ctdb_connection *ctdb_connect(const char *addr,
+ ctdb_log_fn_t log_fn, void *log_priv)
{
struct ctdb_connection *ctdb;
struct sockaddr_un sun;
ctdb = malloc(sizeof(*ctdb));
- if (!ctdb)
+ if (!ctdb) {
+ /* With no format string, we hope it doesn't use ap! */
+ va_list ap;
+ memset(&ap, 0, sizeof(ap));
+ errno = ENOMEM;
+ log_fn(log_priv, LOG_ERR, "ctdb_connect: no memory", ap);
goto fail;
+ }
ctdb->outq = NULL;
ctdb->doneq = NULL;
ctdb->in = NULL;
ctdb->message_handlers = NULL;
ctdb->next_id = 0;
ctdb->broken = false;
+ ctdb->log = log_fn;
+ ctdb->log_priv = log_priv;
+ ctdb->locks = NULL;
memset(&sun, 0, sizeof(sun));
sun.sun_family = AF_UNIX;
return NULL;
}
+void ctdb_disconnect(struct ctdb_connection *ctdb)
+{
+ struct ctdb_request *i;
+
+ DEBUG(ctdb, LOG_DEBUG, "ctdb_disconnect");
+
+ while ((i = ctdb->outq) != NULL) {
+ DLIST_REMOVE(ctdb->outq, i);
+ ctdb_request_free(ctdb, i);
+ }
+
+ while ((i = ctdb->doneq) != NULL) {
+ DLIST_REMOVE(ctdb->doneq, i);
+ ctdb_request_free(ctdb, i);
+ }
+
+ if (ctdb->in)
+ free_io_elem(ctdb->in);
+
+ remove_message_handlers(ctdb);
+
+ close(ctdb->fd);
+ /* Just in case they try to reuse */
+ ctdb->fd = -1;
+ free(ctdb);
+}
+
int ctdb_get_fd(struct ctdb_connection *ctdb)
{
return ctdb->fd;
void ctdb_request_free(struct ctdb_connection *ctdb, struct ctdb_request *req)
{
+ if (req->next || req->prev) {
+ DEBUG(ctdb, LOG_ALERT,
+ "ctdb_request_free: request not complete! ctdb_cancel? %p (id %u)",
+ req, req->hdr.hdr ? req->hdr.hdr->reqid : 0);
+ ctdb_cancel(ctdb, req);
+ return;
+ }
if (req->extra_destructor) {
req->extra_destructor(ctdb, req);
}
free(req);
}
-/* Sanity-checking wrapper for reply.
- * FIXME: logging! */
+/* Sanity-checking wrapper for reply. */
static struct ctdb_reply_call *unpack_reply_call(struct ctdb_connection *ctdb,
struct ctdb_request *req,
uint32_t callid)
size_t len;
struct ctdb_reply_call *inhdr = io_elem_data(req->reply, &len);
- /* ctdbd or our error if this isn't a reply call. */
- if (len < sizeof(*inhdr) || inhdr->hdr.operation != CTDB_REPLY_CALL) {
- errno = EIO;
+ /* Library user error if this isn't a reply to a call. */
+ if (req->hdr.hdr->operation != CTDB_REQ_CALL) {
+ errno = EINVAL;
+ DEBUG(ctdb, LOG_ALERT,
+ "This was not a ctdbd call request: operation %u",
+ req->hdr.hdr->operation);
return NULL;
}
- /* Library user error if this isn't a reply to a call. */
- if (req->hdr.hdr->operation != CTDB_REQ_CALL
- || req->hdr.call->callid != callid) {
+ if (req->hdr.call->callid != callid) {
errno = EINVAL;
+ DEBUG(ctdb, LOG_ALERT,
+ "This was not a ctdbd %u call request: %u",
+ callid, req->hdr.call->callid);
+ return NULL;
+ }
+
+ /* ctdbd or our error if this isn't a reply call. */
+ if (len < sizeof(*inhdr) || inhdr->hdr.operation != CTDB_REPLY_CALL) {
+ errno = EIO;
+ DEBUG(ctdb, LOG_CRIT,
+ "Invalid ctdbd call reply: len %zu, operation %u",
+ len, inhdr->hdr.operation);
return NULL;
}
return inhdr;
}
-/* Sanity-checking wrapper for reply.
- * FIXME: logging! */
+/* Sanity-checking wrapper for reply. */
struct ctdb_reply_control *unpack_reply_control(struct ctdb_connection *ctdb,
struct ctdb_request *req,
enum ctdb_controls control)
struct ctdb_reply_control *inhdr = io_elem_data(req->reply, &len);
/* Library user error if this isn't a reply to a call. */
- if (len < sizeof(*inhdr)
- || req->hdr.hdr->operation != CTDB_REQ_CONTROL) {
+ if (len < sizeof(*inhdr)) {
errno = EINVAL;
+ DEBUG(ctdb, LOG_ALERT,
+ "Short ctdbd control reply: %zu bytes", len);
+ return NULL;
+ }
+ if (req->hdr.hdr->operation != CTDB_REQ_CONTROL) {
+ errno = EINVAL;
+ DEBUG(ctdb, LOG_ALERT,
+ "This was not a ctdbd control request: operation %u",
+ req->hdr.hdr->operation);
return NULL;
}
/* ... or if it was a different control from what we expected. */
if (req->hdr.control->opcode != control) {
errno = EINVAL;
+ DEBUG(ctdb, LOG_ALERT,
+ "This was not an opcode %u ctdbd control request: %u",
+ control, req->hdr.control->opcode);
return NULL;
}
/* ctdbd or our error if this isn't a reply call. */
if (inhdr->hdr.operation != CTDB_REPLY_CONTROL) {
errno = EIO;
+ DEBUG(ctdb, LOG_CRIT,
+ "Invalid ctdbd control reply: operation %u",
+ inhdr->hdr.operation);
return NULL;
}
return;
}
}
- /* FIXME: report this error. */
+ DEBUG(ctdb, LOG_WARNING,
+ "Unexpected ctdbd request reply: operation %u reqid %u",
+ hdr->operation, hdr->reqid);
free_io_elem(in);
}
return ret;
}
-int ctdb_service(struct ctdb_connection *ctdb, int revents)
+bool ctdb_service(struct ctdb_connection *ctdb, int revents)
{
if (ctdb->broken) {
- return -1;
+ return false;
+ }
+
+ if (holding_lock(ctdb)) {
+ DEBUG(ctdb, LOG_ALERT, "Do not block while holding lock!");
}
if (revents & POLLOUT) {
while (ctdb->outq) {
if (real_error(write_io_elem(ctdb->fd,
ctdb->outq->io)) < 0) {
+ DEBUG(ctdb, LOG_ERR,
+ "ctdb_service: error writing to ctdbd");
ctdb->broken = true;
- return -1;
+ return false;
}
if (io_elem_finished(ctdb->outq->io)) {
struct ctdb_request *done = ctdb->outq;
while (revents & POLLIN) {
int ret;
+ int num_ready = 0;
+
+ if (ioctl(ctdb->fd, FIONREAD, &num_ready) != 0) {
+ DEBUG(ctdb, LOG_ERR,
+ "ctdb_service: ioctl(FIONREAD) %d", errno);
+ ctdb->broken = true;
+ return false;
+ }
+ if (num_ready == 0) {
+ /* the descriptor has been closed or we have all our data */
+ break;
+ }
+
if (!ctdb->in) {
ctdb->in = new_io_elem(sizeof(struct ctdb_req_header));
if (!ctdb->in) {
+ DEBUG(ctdb, LOG_ERR,
+ "ctdb_service: allocating readbuf");
ctdb->broken = true;
- return -1;
+ return false;
}
}
/* They closed fd? */
if (ret == 0)
errno = EBADF;
+ DEBUG(ctdb, LOG_ERR,
+ "ctdb_service: error reading from ctdbd");
ctdb->broken = true;
- return -1;
+ return false;
} else if (ret < 0) {
/* No progress, stop loop. */
- revents = 0;
+ break;
} else if (io_elem_finished(ctdb->in)) {
- handle_incoming(ctdb, ctdb->in);
+ io_elem_queue(ctdb, ctdb->in);
ctdb->in = NULL;
}
}
- return 0;
+
+ while (ctdb->inqueue != NULL) {
+ struct io_elem *io = ctdb->inqueue;
+
+ io_elem_dequeue(ctdb, io);
+ handle_incoming(ctdb, io);
+ }
+
+ return true;
}
/* This is inefficient. We could pull in idtree.c. */
ctdb_request_free(ctdb, req);
}
-int ctdb_cancel(struct ctdb_connection *ctdb, struct ctdb_request *req)
+void ctdb_cancel(struct ctdb_connection *ctdb, struct ctdb_request *req)
{
+ if (!req->next && !req->prev) {
+ DEBUG(ctdb, LOG_ALERT,
+ "ctdb_cancel: request completed! ctdb_request_free? %p (id %u)",
+ req, req->hdr.hdr ? req->hdr.hdr->reqid : 0);
+ ctdb_request_free(ctdb, req);
+ return;
+ }
+
+ DEBUG(ctdb, LOG_DEBUG, "ctdb_cancel: %p (id %u)",
+ req, req->hdr.hdr ? req->hdr.hdr->reqid : 0);
+
/* FIXME: If it's not sent, we could just free it right now. */
req->callback = ctdb_cancel_callback;
- return 0;
}
-struct ctdb_db {
- struct ctdb_connection *ctdb;
- bool persistent;
- uint32_t tdb_flags;
- uint32_t id;
- struct tdb_context *tdb;
-
- ctdb_callback_t callback;
- void *private_data;
-};
+void ctdb_detachdb(struct ctdb_connection *ctdb, struct ctdb_db *db)
+{
+ cleanup_locks(ctdb, db);
+ tdb_close(db->tdb);
+ free(db);
+}
static void attachdb_getdbpath_done(struct ctdb_connection *ctdb,
struct ctdb_request *req,
struct ctdb_reply_control *reply;
struct ctdb_db *db = req->priv_data;
uint32_t tdb_flags = db->tdb_flags;
+ struct tdb_logging_context log;
/* Never sent the dbpath request? We've failed. */
if (!dbpath_req) {
}
reply = unpack_reply_control(ctdb, dbpath_req, CTDB_CONTROL_GETDBPATH);
- if (!reply || reply->status != 0) {
+ if (!reply) {
+ return NULL;
+ }
+ if (reply->status != 0) {
+ DEBUG(db->ctdb, LOG_ERR,
+ "ctdb_attachdb_recv: reply status %i", reply->status);
return NULL;
}
tdb_flags = db->persistent ? TDB_DEFAULT : TDB_NOSYNC;
tdb_flags |= TDB_DISALLOW_NESTING;
- db->tdb = tdb_open((char *)reply->data, 0, tdb_flags, O_RDWR, 0);
+ log.log_fn = ctdb_tdb_log_bridge;
+ log.log_private = ctdb;
+ db->tdb = tdb_open_ex((char *)reply->data, 0, tdb_flags, O_RDWR, 0,
+ &log, NULL);
if (db->tdb == NULL) {
+ DEBUG(db->ctdb, LOG_ERR,
+ "ctdb_attachdb_recv: failed to tdb_open %s",
+ (char *)reply->data);
return NULL;
}
/* Finally, separate the db from the request (see destroy_req_db). */
req->priv_data = NULL;
+ DEBUG(db->ctdb, LOG_DEBUG,
+ "ctdb_attachdb_recv: db %p, tdb %s", db, (char *)reply->data);
return db;
}
reply = unpack_reply_control(ctdb, req, control);
if (!reply || reply->status != 0) {
+ if (reply) {
+ DEBUG(ctdb, LOG_ERR,
+ "ctdb_attachdb_send(async): DB_ATTACH status %i",
+ reply->status);
+ }
/* We failed. Hand request to user and have them discover it
* via ctdb_attachdb_recv. */
db->callback(ctdb, req, db->private_data);
&db->id, sizeof(db->id),
attachdb_getdbpath_done, db);
if (!req2) {
+ DEBUG(db->ctdb, LOG_ERR,
+ "ctdb_attachdb_send(async): failed to allocate");
db->callback(ctdb, req, db->private_data);
return;
}
req->extra = req2;
req2->extra = req;
+ DEBUG(db->ctdb, LOG_DEBUG,
+ "ctdb_attachdb_send(async): created getdbpath request");
}
static void destroy_req_db(struct ctdb_connection *ctdb,
struct ctdb_request *
ctdb_attachdb_send(struct ctdb_connection *ctdb,
- const char *name, int persistent, uint32_t tdb_flags,
+ const char *name, bool persistent, uint32_t tdb_flags,
ctdb_callback_t callback, void *private_data)
{
struct ctdb_request *req;
req = new_ctdb_control_request(ctdb, opcode, CTDB_CURRENT_NODE, name,
strlen(name) + 1, attachdb_done, db);
if (!req) {
+ DEBUG(ctdb, LOG_ERR,
+ "ctdb_attachdb_send: failed allocating DB_ATTACH");
free(db);
return NULL;
}
/* Flags get overloaded into srvid. */
req->hdr.control->srvid = tdb_flags;
+ DEBUG(db->ctdb, LOG_DEBUG,
+ "ctdb_attachdb_send: DB_ATTACH request %p", req);
return req;
}
-struct ctdb_lock {
- struct ctdb_db *ctdb_db;
- TDB_DATA key;
-
- /* This will always be true by the time user sees this. */
- bool held;
- struct ctdb_ltdb_header *hdr;
-
- /* For convenience, we stash original callback here. */
- ctdb_rrl_callback_t callback;
-};
-
-void ctdb_release_lock(struct ctdb_lock *lock)
+static unsigned long lock_magic(struct ctdb_lock *lock)
{
- if (lock->held) {
- tdb_chainunlock(lock->ctdb_db->tdb, lock->key);
- lock->held = false;
- }
+ /* A non-zero magic specific to this structure. */
+ return ((unsigned long)lock->key.dptr
+ ^ (((unsigned long)lock->key.dptr) << 16)
+ ^ 0xBADC0FFEEBADC0DEULL)
+ | 1;
}
-static void ctdb_free_lock(struct ctdb_lock *lock)
+/* This is only called on locks before they're held. */
+static void free_lock(struct ctdb_lock *lock)
{
- if (lock->held) {
- /* FIXME: report error. Callback never released the lock */
- ctdb_release_lock(lock);
+ if (lock->held_magic) {
+ DEBUG(lock->ctdb_db->ctdb, LOG_ALERT,
+ "free_lock invalid lock %p", lock);
}
-
free(lock->hdr);
free(lock);
}
+
+void ctdb_release_lock(struct ctdb_db *ctdb_db, struct ctdb_lock *lock)
+{
+ if (lock->held_magic != lock_magic(lock)) {
+ DEBUG(lock->ctdb_db->ctdb, LOG_ALERT,
+ "ctdb_release_lock invalid lock %p", lock);
+ } else if (lock->ctdb_db != ctdb_db) {
+ errno = EBADF;
+ DEBUG(ctdb_db->ctdb, LOG_ALERT,
+ "ctdb_release_lock: wrong ctdb_db.");
+ } else {
+ tdb_chainunlock(lock->ctdb_db->tdb, lock->key);
+ DEBUG(lock->ctdb_db->ctdb, LOG_DEBUG,
+ "ctdb_release_lock %p", lock);
+ remove_lock(lock->ctdb_db->ctdb, lock);
+ }
+ lock->held_magic = 0;
+ free_lock(lock);
+}
+
+
/* We keep the lock if local node is the dmaster. */
static bool try_readrecordlock(struct ctdb_lock *lock, TDB_DATA *data)
{
struct ctdb_ltdb_header *hdr;
if (tdb_chainlock(lock->ctdb_db->tdb, lock->key) != 0) {
+ DEBUG(lock->ctdb_db->ctdb, LOG_WARNING,
+ "ctdb_readrecordlock_async: failed to chainlock");
return NULL;
}
hdr = ctdb_local_fetch(lock->ctdb_db->tdb, lock->key, data);
if (hdr && hdr->dmaster == lock->ctdb_db->ctdb->pnn) {
- lock->held = true;
+ DEBUG(lock->ctdb_db->ctdb, LOG_DEBUG,
+ "ctdb_readrecordlock_async: got local lock");
+ lock->held_magic = lock_magic(lock);
lock->hdr = hdr;
+ add_lock(lock->ctdb_db->ctdb, lock);
return true;
}
static void destroy_lock(struct ctdb_connection *ctdb,
struct ctdb_request *req)
{
- ctdb_release_lock(req->extra);
- ctdb_free_lock(req->extra);
+ free_lock(req->extra);
}
static void readrecordlock_retry(struct ctdb_connection *ctdb,
/* OK, we've received reply to noop migration */
reply = unpack_reply_call(ctdb, req, CTDB_NULL_FUNC);
if (!reply || reply->status != 0) {
+ if (reply) {
+ DEBUG(ctdb, LOG_ERR,
+ "ctdb_readrecordlock_async(async):"
+ " NULL_FUNC returned %i", reply->status);
+ }
lock->callback(lock->ctdb_db, NULL, tdb_null, private);
ctdb_request_free(ctdb, req); /* Also frees lock. */
- ctdb_free_lock(lock);
return;
}
/* Now it's their responsibility to free lock & request! */
req->extra_destructor = NULL;
lock->callback(lock->ctdb_db, lock, data, private);
- ctdb_free_lock(lock);
+ ctdb_request_free(ctdb, req);
return;
}
struct ctdb_lock *lock;
TDB_DATA data;
+ if (holding_lock(ctdb_db->ctdb)) {
+ DEBUG(ctdb_db->ctdb, LOG_ALERT,
+ "ctdb_readrecordlock_async: already holding lock");
+ return false;
+ }
+
/* Setup lock */
lock = malloc(sizeof(*lock) + key.dsize);
if (!lock) {
+ DEBUG(ctdb_db->ctdb, LOG_ERR,
+ "ctdb_readrecordlock_async: lock allocation failed");
return false;
}
lock->key.dptr = (void *)(lock + 1);
lock->key.dsize = key.dsize;
lock->ctdb_db = ctdb_db;
lock->hdr = NULL;
- lock->held = false;
+ lock->held_magic = 0;
/* Fast path. */
if (try_readrecordlock(lock, &data)) {
callback(ctdb_db, lock, data, cbdata);
- ctdb_free_lock(lock);
return true;
}
req = new_ctdb_request(offsetof(struct ctdb_req_call, data)
+ key.dsize, readrecordlock_retry, cbdata);
if (!req) {
- ctdb_release_lock(lock);
- ctdb_free_lock(lock);
+ DEBUG(ctdb_db->ctdb, LOG_ERR,
+ "ctdb_readrecordlock_async: allocation failed");
+ free_lock(lock);
return NULL;
}
req->extra = lock;
return true;
}
-int ctdb_writerecord(struct ctdb_lock *lock, TDB_DATA data)
+bool ctdb_writerecord(struct ctdb_db *ctdb_db,
+ struct ctdb_lock *lock, TDB_DATA data)
{
- if (lock->ctdb_db->persistent) {
- /* FIXME: Report error. */
- return -1;
+ if (lock->ctdb_db != ctdb_db) {
+ errno = EBADF;
+ DEBUG(ctdb_db->ctdb, LOG_ALERT,
+ "ctdb_writerecord: Can not write, wrong ctdb_db.");
+ return false;
}
- if (!lock->held) {
- /* FIXME: Report error. */
- return -1;
+ if (lock->held_magic != lock_magic(lock)) {
+ errno = EBADF;
+ DEBUG(ctdb_db->ctdb, LOG_ALERT,
+ "ctdb_writerecord: Can not write. Lock has been released.");
+ return false;
}
+
+ if (ctdb_db->persistent) {
+ errno = EINVAL;
+ DEBUG(ctdb_db->ctdb, LOG_ALERT,
+ "ctdb_writerecord: cannot write to persistent db");
+ return false;
+ }
+
+ switch (ctdb_local_store(ctdb_db->tdb, lock->key, lock->hdr, data)) {
+ case 0:
+ DEBUG(ctdb_db->ctdb, LOG_DEBUG,
+ "ctdb_writerecord: optimized away noop write.");
+ /* fall thru */
+ case 1:
+ return true;
- return ctdb_local_store(lock->ctdb_db->tdb, lock->key, lock->hdr,
- data);
+ default:
+ switch (errno) {
+ case ENOMEM:
+ DEBUG(ctdb_db->ctdb, LOG_CRIT,
+ "ctdb_writerecord: out of memory.");
+ break;
+ case EINVAL:
+ DEBUG(ctdb_db->ctdb, LOG_ALERT,
+ "ctdb_writerecord: record changed under lock?");
+ break;
+ default: /* TDB already logged. */
+ break;
+ }
+ return false;
+ }
}