along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "includes.h"
-#include "lib/tdb/include/tdb.h"
-#include "lib/tevent/tevent.h"
-#include "lib/util/dlinklist.h"
+#include "replace.h"
#include "system/network.h"
#include "system/filesys.h"
-#include "../include/ctdb_private.h"
-#include "../include/ctdb_client.h"
-#include <stdarg.h>
+
+#include <tdb.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/sys_rw.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/logging.h"
+#include "common/common.h"
/* structures for packet queueing - see common/ctdb_io.c */
-struct ctdb_partial {
+struct ctdb_buffer {
uint8_t *data;
uint32_t length;
+ uint32_t size;
};
struct ctdb_queue_pkt {
uint8_t *data;
uint32_t length;
uint32_t full_length;
+ uint8_t buf[];
};
struct ctdb_queue {
struct ctdb_context *ctdb;
- struct ctdb_partial partial; /* partial input packet */
+ struct tevent_immediate *im;
+ struct ctdb_buffer buffer; /* input buffer */
struct ctdb_queue_pkt *out_queue, *out_queue_tail;
uint32_t out_queue_length;
- struct fd_event *fde;
+ struct tevent_fd *fde;
int fd;
size_t alignment;
void *private_data;
ctdb_queue_cb_fn_t callback;
- bool *destroyed;
const char *name;
+ uint32_t buffer_size;
};
return queue->out_queue_length;
}
+static void queue_process(struct ctdb_queue *queue);
+
+static void queue_process_event(struct tevent_context *ev, struct tevent_immediate *im,
+ void *private_data)
+{
+ struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
+
+ queue_process(queue);
+}
+
+/*
+ * This function is used to process data in queue buffer.
+ *
+ * Queue callback function can end up freeing the queue, there should not be a
+ * loop processing packets from queue buffer. Instead set up a timed event for
+ * immediate run to process remaining packets from buffer.
+ */
+static void queue_process(struct ctdb_queue *queue)
+{
+ uint32_t pkt_size;
+ uint8_t *data;
+
+ if (queue->buffer.length < sizeof(pkt_size)) {
+ return;
+ }
+
+ /* Did we at least read the size into the buffer */
+ pkt_size = *(uint32_t *)queue->buffer.data;
+ if (pkt_size == 0) {
+ DEBUG(DEBUG_CRIT, ("Invalid packet of length 0\n"));
+ goto failed;
+ }
+
+ /* the buffer doesn't contain the full packet, return to get the rest */
+ if (queue->buffer.length < pkt_size) {
+ return;
+ }
+
+ /* Extract complete packet */
+ data = talloc_memdup(queue, queue->buffer.data, pkt_size);
+ if (data == NULL) {
+ D_ERR("read error alloc failed for %u\n", pkt_size);
+ return;
+ }
+
+ /* Shift packet out from buffer */
+ if (queue->buffer.length > pkt_size) {
+ memmove(queue->buffer.data,
+ queue->buffer.data + pkt_size,
+ queue->buffer.length - pkt_size);
+ }
+ queue->buffer.length -= pkt_size;
+
+ if (queue->buffer.length > 0) {
+ /* There is more data to be processed, schedule an event */
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_process_event, queue);
+ } else {
+ if (queue->buffer.size > queue->buffer_size) {
+ TALLOC_FREE(queue->buffer.data);
+ queue->buffer.size = 0;
+ }
+ }
+
+ /* It is the responsibility of the callback to free 'data' */
+ queue->callback(data, pkt_size, queue->private_data);
+ return;
+
+failed:
+ queue->callback(NULL, 0, queue->private_data);
+
+}
+
+
/*
called when an incoming connection is readable
This function MUST be safe for reentry via the queue callback!
static void queue_io_read(struct ctdb_queue *queue)
{
int num_ready = 0;
- uint32_t sz_bytes_req;
uint32_t pkt_size;
- uint32_t pkt_bytes_remaining;
- uint32_t to_read;
ssize_t nread;
uint8_t *data;
+ /* check how much data is available on the socket for immediately
+ guaranteed nonblocking access.
+ as long as we are careful never to try to read more than this
+ we know all reads will be successful and will neither block
+ nor fail with a "data not available right now" error
+ */
if (ioctl(queue->fd, FIONREAD, &num_ready) != 0) {
return;
}
goto failed;
}
- if (queue->partial.data == NULL) {
- /* starting fresh, allocate buf for size bytes */
- sz_bytes_req = sizeof(pkt_size);
- queue->partial.data = talloc_size(queue, sz_bytes_req);
- if (queue->partial.data == NULL) {
- DEBUG(DEBUG_ERR,("read error alloc failed for %u\n",
- sz_bytes_req));
+ if (queue->buffer.data == NULL) {
+ /* starting fresh, allocate buf to read data */
+ queue->buffer.data = talloc_size(queue, queue->buffer_size);
+ if (queue->buffer.data == NULL) {
+ DEBUG(DEBUG_ERR, ("read error alloc failed for %u\n", num_ready));
goto failed;
}
- } else if (queue->partial.length < sizeof(pkt_size)) {
- /* yet to find out the packet length */
- sz_bytes_req = sizeof(pkt_size) - queue->partial.length;
- } else {
- /* partial packet, length known, full buf allocated */
- sz_bytes_req = 0;
+ queue->buffer.size = queue->buffer_size;
+ goto data_read;
}
- data = queue->partial.data;
- if (sz_bytes_req > 0) {
- to_read = MIN(sz_bytes_req, num_ready);
- nread = read(queue->fd, data + queue->partial.length,
- to_read);
- if (nread <= 0) {
- DEBUG(DEBUG_ERR,("read error nread=%d\n", (int)nread));
- goto failed;
- }
- queue->partial.length += nread;
+ if (queue->buffer.length < sizeof(pkt_size)) {
+ /* data read is not sufficient to gather message size */
+ goto data_read;
+ }
- if (nread < sz_bytes_req) {
- /* not enough to know the length */
- DEBUG(DEBUG_DEBUG,("Partial packet length read\n"));
- return;
- }
- /* size now known, allocate buffer for the full packet */
- queue->partial.data = talloc_realloc_size(queue, data,
- *(uint32_t *)data);
- if (queue->partial.data == NULL) {
- DEBUG(DEBUG_ERR,("read error alloc failed for %u\n",
- *(uint32_t *)data));
+ pkt_size = *(uint32_t *)queue->buffer.data;
+ if (pkt_size > queue->buffer.size) {
+ data = talloc_realloc_size(queue,
+ queue->buffer.data,
+ pkt_size);
+ if (data == NULL) {
+ DBG_ERR("read error realloc failed for %u\n", pkt_size);
goto failed;
}
- data = queue->partial.data;
- num_ready -= nread;
- }
-
- pkt_size = *(uint32_t *)data;
- if (pkt_size == 0) {
- DEBUG(DEBUG_CRIT,("Invalid packet of length 0\n"));
- goto failed;
+ queue->buffer.data = data;
+ queue->buffer.size = pkt_size;
}
- pkt_bytes_remaining = pkt_size - queue->partial.length;
- to_read = MIN(pkt_bytes_remaining, num_ready);
- nread = read(queue->fd, data + queue->partial.length,
- to_read);
- if (nread <= 0) {
- DEBUG(DEBUG_ERR,("read error nread=%d\n",
- (int)nread));
- goto failed;
- }
- queue->partial.length += nread;
+data_read:
+ num_ready = MIN(num_ready, queue->buffer.size - queue->buffer.length);
- if (queue->partial.length < pkt_size) {
- DEBUG(DEBUG_DEBUG,("Partial packet data read\n"));
- return;
+ if (num_ready > 0) {
+ nread = sys_read(queue->fd,
+ queue->buffer.data + queue->buffer.length,
+ num_ready);
+ if (nread <= 0) {
+ DEBUG(DEBUG_ERR, ("read error nread=%d\n", (int)nread));
+ goto failed;
+ }
+ queue->buffer.length += nread;
}
- queue->partial.data = NULL;
- queue->partial.length = 0;
- /* it is the responsibility of the callback to free 'data' */
- queue->callback(data, pkt_size, queue->private_data);
+ queue_process(queue);
return;
failed:
/* used when an event triggers a dead queue */
-static void queue_dead(struct event_context *ev, struct timed_event *te,
- struct timeval t, void *private_data)
+static void queue_dead(struct tevent_context *ev, struct tevent_immediate *im,
+ void *private_data)
{
struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
queue->callback(NULL, 0, queue->private_data);
talloc_free(queue->fde);
queue->fde = NULL;
queue->fd = -1;
- event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
- queue_dead, queue);
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_dead, queue);
return;
}
if (n <= 0) return;
talloc_free(pkt);
}
- EVENT_FD_NOT_WRITEABLE(queue->fde);
+ TEVENT_FD_NOT_WRITEABLE(queue->fde);
}
/*
called when an incoming connection is readable or writeable
*/
-static void queue_io_handler(struct event_context *ev, struct fd_event *fde,
+static void queue_io_handler(struct tevent_context *ev, struct tevent_fd *fde,
uint16_t flags, void *private_data)
{
struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
- if (flags & EVENT_FD_READ) {
+ if (flags & TEVENT_FD_READ) {
queue_io_read(queue);
} else {
queue_io_write(queue);
*/
int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length)
{
+ struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
struct ctdb_queue_pkt *pkt;
uint32_t length2, full_length;
+ /* If the queue does not have valid fd, no point queueing a packet */
+ if (queue->fd == -1) {
+ return 0;
+ }
+
if (queue->alignment) {
/* enforce the length and alignment rules from the tcp packet allocator */
length2 = (length+(queue->alignment-1)) & ~(queue->alignment-1);
talloc_free(queue->fde);
queue->fde = NULL;
queue->fd = -1;
- event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
- queue_dead, queue);
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_dead, queue);
/* yes, we report success, as the dead node is
handled via a separate event */
return 0;
if (length2 == 0) return 0;
}
- pkt = talloc(queue, struct ctdb_queue_pkt);
+ pkt = talloc_size(
+ queue, offsetof(struct ctdb_queue_pkt, buf) + length2);
CTDB_NO_MEMORY(queue->ctdb, pkt);
+ talloc_set_name_const(pkt, "struct ctdb_queue_pkt");
- pkt->data = talloc_memdup(pkt, data, length2);
- CTDB_NO_MEMORY(queue->ctdb, pkt->data);
+ pkt->data = pkt->buf;
+ memcpy(pkt->data, data, length2);
pkt->length = length2;
pkt->full_length = full_length;
if (queue->out_queue == NULL && queue->fd != -1) {
- EVENT_FD_WRITEABLE(queue->fde);
+ TEVENT_FD_WRITEABLE(queue->fde);
}
- DLIST_ADD_END(queue->out_queue, pkt, NULL);
+ DLIST_ADD_END(queue->out_queue, pkt);
queue->out_queue_length++;
if (queue->ctdb->tunable.verbose_memory_names != 0) {
- struct ctdb_req_header *hdr = (struct ctdb_req_header *)pkt->data;
switch (hdr->operation) {
case CTDB_REQ_CONTROL: {
- struct ctdb_req_control *c = (struct ctdb_req_control *)hdr;
+ struct ctdb_req_control_old *c = (struct ctdb_req_control_old *)hdr;
talloc_set_name(pkt, "ctdb_queue_pkt: %s control opcode=%u srvid=%llu datalen=%u",
queue->name, (unsigned)c->opcode, (unsigned long long)c->srvid, (unsigned)c->datalen);
break;
}
case CTDB_REQ_MESSAGE: {
- struct ctdb_req_message *m = (struct ctdb_req_message *)hdr;
+ struct ctdb_req_message_old *m = (struct ctdb_req_message_old *)hdr;
talloc_set_name(pkt, "ctdb_queue_pkt: %s message srvid=%llu datalen=%u",
queue->name, (unsigned long long)m->srvid, (unsigned)m->datalen);
break;
queue->fde = NULL;
if (fd != -1) {
- queue->fde = event_add_fd(queue->ctdb->ev, queue, fd, EVENT_FD_READ,
- queue_io_handler, queue);
+ queue->fde = tevent_add_fd(queue->ctdb->ev, queue, fd,
+ TEVENT_FD_READ,
+ queue_io_handler, queue);
if (queue->fde == NULL) {
return -1;
}
tevent_fd_set_auto_close(queue->fde);
if (queue->out_queue) {
- EVENT_FD_WRITEABLE(queue->fde);
+ TEVENT_FD_WRITEABLE(queue->fde);
}
}
return 0;
}
-/* If someone sets up this pointer, they want to know if the queue is freed */
-static int queue_destructor(struct ctdb_queue *queue)
-{
- if (queue->destroyed != NULL)
- *queue->destroyed = true;
- return 0;
-}
-
/*
setup a packet queue on a socket
*/
struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx, int fd, int alignment,
-
ctdb_queue_cb_fn_t callback,
void *private_data, const char *fmt, ...)
{
va_end(ap);
CTDB_NO_MEMORY_NULL(ctdb, queue->name);
+ queue->im= tevent_create_immediate(queue);
+ CTDB_NO_MEMORY_NULL(ctdb, queue->im);
+
queue->ctdb = ctdb;
queue->fd = fd;
queue->alignment = alignment;
return NULL;
}
}
- talloc_set_destructor(queue, queue_destructor);
+
+ queue->buffer_size = ctdb->tunable.queue_buffer_size;
+ /* In client code, ctdb->tunable is not initialized.
+ * This does not affect recovery daemon.
+ */
+ if (queue->buffer_size == 0) {
+ queue->buffer_size = 1024;
+ }
return queue;
}