along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "includes.h"
-#include "lib/tdb/include/tdb.h"
-#include "lib/events/events.h"
-#include "lib/util/dlinklist.h"
+#include "replace.h"
#include "system/network.h"
#include "system/filesys.h"
-#include "../include/ctdb_private.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+#include "ctdb_logging.h"
+
+#include "common/common.h"
/*
choose the transport we will use
int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
{
ctdb->transport = talloc_strdup(ctdb, transport);
- return 0;
-}
+ CTDB_NO_MEMORY(ctdb, ctdb->transport);
-/*
- choose the recovery lock file
-*/
-int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file)
-{
- ctdb->recovery_lock_file = talloc_strdup(ctdb, file);
return 0;
}
/*
- choose the logfile location
+ Check whether an ip is a valid node ip
+ Returns the node id for this ip address or -1
*/
-int ctdb_set_logfile(struct ctdb_context *ctdb, const char *logfile)
+int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const ctdb_sock_addr *nodeip)
{
- ctdb->logfile = talloc_strdup(ctdb, logfile);
- if (ctdb->logfile != NULL && strcmp(logfile, "-") != 0) {
- int fd;
- fd = open(ctdb->logfile, O_WRONLY|O_APPEND|O_CREAT, 0666);
- if (fd == -1) {
- printf("Failed to open logfile %s\n", ctdb->logfile);
- abort();
+ int nodeid;
+
+ for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) {
+ if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) {
+ continue;
}
- close(1);
- close(2);
- if (fd != 1) {
- dup2(fd, 1);
- close(fd);
+ if (ctdb_same_ip(&ctdb->nodes[nodeid]->address, nodeip)) {
+ return nodeid;
}
- /* also catch stderr of subcommands to the log file */
- dup2(1, 2);
}
- return 0;
-}
+ return -1;
+}
/*
- set the directory for the local databases
+ choose the recovery lock file
*/
-int ctdb_set_tdb_dir(struct ctdb_context *ctdb, const char *dir)
+int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file)
{
- ctdb->db_directory = talloc_strdup(ctdb, dir);
- if (ctdb->db_directory == NULL) {
- return -1;
+ if (ctdb->recovery_lock_file != NULL) {
+ talloc_free(ctdb->recovery_lock_file);
+ ctdb->recovery_lock_file = NULL;
}
+
+ if (file == NULL) {
+ DEBUG(DEBUG_ALERT,("Recovery lock file set to \"\". Disabling recovery lock checking\n"));
+ return 0;
+ }
+
+ ctdb->recovery_lock_file = talloc_strdup(ctdb, file);
+ CTDB_NO_MEMORY(ctdb, ctdb->recovery_lock_file);
+
return 0;
}
-/*
- add a node to the list of active nodes
-*/
-static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
+/* Load a nodes list file into a nodes array */
+static int convert_node_map_to_list(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_node_map_old *node_map,
+ struct ctdb_node ***nodes,
+ uint32_t *num_nodes)
{
- struct ctdb_node *node, **nodep;
+ int i;
- nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1);
- CTDB_NO_MEMORY(ctdb, nodep);
+ *nodes = talloc_zero_array(mem_ctx,
+ struct ctdb_node *, node_map->num);
+ CTDB_NO_MEMORY(ctdb, *nodes);
+ *num_nodes = node_map->num;
- ctdb->nodes = nodep;
- nodep = &ctdb->nodes[ctdb->num_nodes];
- (*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node);
- CTDB_NO_MEMORY(ctdb, *nodep);
- node = *nodep;
+ for (i = 0; i < node_map->num; i++) {
+ struct ctdb_node *node;
- if (ctdb_parse_address(ctdb, node, nstr, &node->address) != 0) {
- return -1;
- }
- node->ctdb = ctdb;
- node->name = talloc_asprintf(node, "%s:%u",
- node->address.address,
- node->address.port);
- /* this assumes that the nodes are kept in sorted order, and no gaps */
- node->vnn = ctdb->num_nodes;
-
- /* nodes start out disconnected */
- node->flags |= NODE_FLAGS_DISCONNECTED;
-
- if (ctdb->address.address &&
- ctdb_same_address(&ctdb->address, &node->address)) {
- ctdb->vnn = node->vnn;
- node->flags &= ~NODE_FLAGS_DISCONNECTED;
- }
+ node = talloc_zero(*nodes, struct ctdb_node);
+ CTDB_NO_MEMORY(ctdb, node);
+ (*nodes)[i] = node;
- ctdb->num_nodes++;
- node->dead_count = 0;
+ node->address = node_map->nodes[i].addr;
+ node->name = talloc_asprintf(node, "%s:%u",
+ ctdb_addr_to_str(&node->address),
+ ctdb_addr_to_port(&node->address));
+
+ node->flags = node_map->nodes[i].flags;
+ if (!(node->flags & NODE_FLAGS_DELETED)) {
+ node->flags = NODE_FLAGS_UNHEALTHY;
+ }
+ node->flags |= NODE_FLAGS_DISCONNECTED;
+
+ node->pnn = i;
+ node->ctdb = ctdb;
+ node->dead_count = 0;
+ }
return 0;
}
-/*
- setup the node list from a file
-*/
-int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist)
+/* Load the nodes list from a file */
+void ctdb_load_nodes_file(struct ctdb_context *ctdb)
{
- char **lines;
- int nlines;
- int i;
-
- talloc_free(ctdb->node_list_file);
- ctdb->node_list_file = talloc_strdup(ctdb, nlist);
+ struct ctdb_node_map_old *node_map;
+ int ret;
- lines = file_lines_load(nlist, &nlines, ctdb);
- if (lines == NULL) {
- ctdb_set_error(ctdb, "Failed to load nlist '%s'\n", nlist);
- return -1;
- }
- while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
- nlines--;
+ node_map = ctdb_read_nodes_file(ctdb, ctdb->nodes_file);
+ if (node_map == NULL) {
+ goto fail;
}
- for (i=0;i<nlines;i++) {
- if (ctdb_add_node(ctdb, lines[i]) != 0) {
- talloc_free(lines);
- return -1;
- }
+ TALLOC_FREE(ctdb->nodes);
+ ret = convert_node_map_to_list(ctdb, ctdb, node_map,
+ &ctdb->nodes, &ctdb->num_nodes);
+ if (ret == -1) {
+ goto fail;
}
- /* initialize the vnn mapping table now that we have num_nodes setup */
-/*
-XXX we currently initialize it to the maximum number of nodes to
-XXX make it behave the same way as previously.
-XXX Once we have recovery working we should initialize this always to
-XXX generation==0 (==invalid) and let the recovery tool populate this
-XXX table for the daemons.
-*/
- ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map);
- CTDB_NO_MEMORY(ctdb, ctdb->vnn_map);
-
- ctdb->vnn_map->generation = 1;
- ctdb->vnn_map->size = ctdb->num_nodes;
- ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, ctdb->vnn_map->size);
- CTDB_NO_MEMORY(ctdb, ctdb->vnn_map->map);
+ talloc_free(node_map);
+ return;
- for(i=0;i<ctdb->vnn_map->size;i++) {
- ctdb->vnn_map->map[i] = i;
- }
-
- talloc_free(lines);
- return 0;
+fail:
+ DEBUG(DEBUG_ERR, ("Failed to load nodes file \"%s\"\n",
+ ctdb->nodes_file));
+ talloc_free(node_map);
+ exit(1);
}
-
/*
setup the local node address
*/
int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
{
- if (ctdb_parse_address(ctdb, ctdb, address, &ctdb->address) != 0) {
+ ctdb->address = talloc(ctdb, ctdb_sock_addr);
+ CTDB_NO_MEMORY(ctdb, ctdb->address);
+
+ if (ctdb_parse_address(ctdb, address, ctdb->address) != 0) {
return -1;
}
-
- ctdb->name = talloc_asprintf(ctdb, "%s:%u",
- ctdb->address.address,
- ctdb->address.port);
+
+ ctdb->name = talloc_asprintf(ctdb, "%s:%u",
+ ctdb_addr_to_str(ctdb->address),
+ ctdb_addr_to_port(ctdb->address));
return 0;
}
{
int i;
uint32_t count=0;
- for (i=0;i<ctdb->vnn_map->size;i++) {
- struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
- if (!(node->flags & NODE_FLAGS_INACTIVE)) {
+ for (i=0; i < ctdb->num_nodes; i++) {
+ if (!(ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE)) {
count++;
}
}
tmp_ctx = talloc_new(ctdb);
talloc_steal(tmp_ctx, hdr);
- DEBUG(3,(__location__ " ctdb request %u of type %u length %u from "
+ DEBUG(DEBUG_DEBUG,(__location__ " ctdb request %u of type %u length %u from "
"node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
hdr->srcnode, hdr->destnode));
case CTDB_REPLY_CALL:
case CTDB_REQ_DMASTER:
case CTDB_REPLY_DMASTER:
- /* for ctdb_call inter-node operations verify that the
- remote node that sent us the call is running in the
- same generation instance as this node
- */
- if (ctdb->vnn_map->generation != hdr->generation) {
- DEBUG(0,(__location__ " ctdb request %u"
- " length %u from node %u to %u had an"
- " invalid generation id:%u while our"
- " generation id is:%u\n",
- hdr->reqid, hdr->length,
- hdr->srcnode, hdr->destnode,
- hdr->generation, ctdb->vnn_map->generation));
+ /* we dont allow these calls when banned */
+ if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_BANNED) {
+ DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
+ " request %u"
+ " length %u from node %u to %u while node"
+ " is banned\n",
+ hdr->operation, hdr->reqid,
+ hdr->length,
+ hdr->srcnode, hdr->destnode));
goto done;
}
+
+ /* Push the check for generation in the handlers for these
+ * operations. Check database generation instead of global
+ * generation. Since the database context is not available
+ * here, push the check in the operations.
+ */
}
switch (hdr->operation) {
case CTDB_REQ_CALL:
- ctdb->statistics.node.req_call++;
+ CTDB_INCREMENT_STAT(ctdb, node.req_call);
ctdb_request_call(ctdb, hdr);
break;
case CTDB_REPLY_CALL:
- ctdb->statistics.node.reply_call++;
+ CTDB_INCREMENT_STAT(ctdb, node.reply_call);
ctdb_reply_call(ctdb, hdr);
break;
case CTDB_REPLY_ERROR:
- ctdb->statistics.node.reply_error++;
+ CTDB_INCREMENT_STAT(ctdb, node.reply_error);
ctdb_reply_error(ctdb, hdr);
break;
case CTDB_REQ_DMASTER:
- ctdb->statistics.node.req_dmaster++;
+ CTDB_INCREMENT_STAT(ctdb, node.req_dmaster);
ctdb_request_dmaster(ctdb, hdr);
break;
case CTDB_REPLY_DMASTER:
- ctdb->statistics.node.reply_dmaster++;
+ CTDB_INCREMENT_STAT(ctdb, node.reply_dmaster);
ctdb_reply_dmaster(ctdb, hdr);
break;
case CTDB_REQ_MESSAGE:
- ctdb->statistics.node.req_message++;
+ CTDB_INCREMENT_STAT(ctdb, node.req_message);
ctdb_request_message(ctdb, hdr);
break;
case CTDB_REQ_CONTROL:
- ctdb->statistics.node.req_control++;
+ CTDB_INCREMENT_STAT(ctdb, node.req_control);
ctdb_request_control(ctdb, hdr);
break;
case CTDB_REPLY_CONTROL:
- ctdb->statistics.node.reply_control++;
+ CTDB_INCREMENT_STAT(ctdb, node.reply_control);
ctdb_reply_control(ctdb, hdr);
break;
case CTDB_REQ_KEEPALIVE:
- ctdb->statistics.keepalive_packets_recv++;
+ CTDB_INCREMENT_STAT(ctdb, keepalive_packets_recv);
break;
default:
- DEBUG(0,("%s: Packet with unknown operation %u\n",
+ DEBUG(DEBUG_CRIT,("%s: Packet with unknown operation %u\n",
__location__, hdr->operation));
break;
}
void ctdb_node_dead(struct ctdb_node *node)
{
if (node->flags & NODE_FLAGS_DISCONNECTED) {
- DEBUG(1,("%s: node %s is already marked disconnected: %u connected\n",
+ DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n",
node->ctdb->name, node->name,
node->ctdb->num_connected));
return;
}
node->ctdb->num_connected--;
- node->flags |= NODE_FLAGS_DISCONNECTED;
+ node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY;
node->rx_cnt = 0;
node->dead_count = 0;
- DEBUG(1,("%s: node %s is dead: %u connected\n",
+
+ DEBUG(DEBUG_NOTICE,("%s: node %s is dead: %u connected\n",
node->ctdb->name, node->name, node->ctdb->num_connected));
ctdb_daemon_cancel_controls(node->ctdb, node);
+
+ if (node->ctdb->methods == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n"));
+ return;
+ }
+
+ node->ctdb->methods->restart(node);
}
/*
void ctdb_node_connected(struct ctdb_node *node)
{
if (!(node->flags & NODE_FLAGS_DISCONNECTED)) {
- DEBUG(1,("%s: node %s is already marked connected: %u connected\n",
+ DEBUG(DEBUG_INFO,("%s: node %s is already marked connected: %u connected\n",
node->ctdb->name, node->name,
node->ctdb->num_connected));
return;
node->ctdb->num_connected++;
node->dead_count = 0;
node->flags &= ~NODE_FLAGS_DISCONNECTED;
- DEBUG(1,("%s: connected to %s - %u connected\n",
- node->ctdb->name, node->name, node->ctdb->num_connected));
+ node->flags |= NODE_FLAGS_UNHEALTHY;
+ DEBUG(DEBUG_NOTICE,
+ ("%s: connected to %s - %u connected\n",
+ node->ctdb->name, node->name, node->ctdb->num_connected));
}
struct queue_next {
/*
- trigered when a deferred packet is due
+ triggered when a deferred packet is due
*/
-static void queue_next_trigger(struct event_context *ev, struct timed_event *te,
+static void queue_next_trigger(struct tevent_context *ev,
+ struct tevent_timer *te,
struct timeval t, void *private_data)
{
struct queue_next *q = talloc_get_type(private_data, struct queue_next);
struct queue_next *q;
q = talloc(ctdb, struct queue_next);
if (q == NULL) {
- DEBUG(0,(__location__ " Failed to allocate deferred packet\n"));
+ DEBUG(DEBUG_ERR,(__location__ " Failed to allocate deferred packet\n"));
return;
}
q->ctdb = ctdb;
q->hdr = talloc_memdup(ctdb, hdr, hdr->length);
if (q->hdr == NULL) {
- DEBUG(0,("Error copying deferred packet to self\n"));
+ DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n"));
return;
}
#if 0
/* use this to put packets directly into our recv function */
ctdb_input_pkt(q->ctdb, q->hdr);
#else
- event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
+ tevent_add_timer(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
#endif
}
struct ctdb_req_header *hdr)
{
int i;
- for (i=0;i<ctdb->num_nodes;i++) {
- hdr->destnode = ctdb->nodes[i]->vnn;
+ for (i=0; i < ctdb->num_nodes; i++) {
+ if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
+ hdr->destnode = ctdb->nodes[i]->pnn;
ctdb_queue_packet(ctdb, hdr);
}
}
struct ctdb_req_header *hdr)
{
int i;
- for (i=0;i<ctdb->num_nodes;i++) {
+ for (i=0; i < ctdb->num_nodes; i++) {
+ if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
+ continue;
+ }
if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) {
- hdr->destnode = ctdb->nodes[i]->vnn;
+ hdr->destnode = ctdb->nodes[i]->pnn;
ctdb_queue_packet(ctdb, hdr);
}
}
return;
}
- ctdb->statistics.node_packets_sent++;
+ CTDB_INCREMENT_STAT(ctdb, node_packets_sent);
- if (!ctdb_validate_vnn(ctdb, hdr->destnode)) {
- DEBUG(0,(__location__ " cant send to node %u that does not exist\n",
+ if (!ctdb_validate_pnn(ctdb, hdr->destnode)) {
+ DEBUG(DEBUG_CRIT,(__location__ " cant send to node %u that does not exist\n",
hdr->destnode));
return;
}
node = ctdb->nodes[hdr->destnode];
- if (hdr->destnode == ctdb->vnn) {
+ if (node->flags & NODE_FLAGS_DELETED) {
+ DEBUG(DEBUG_ERR, (__location__ " Can not queue packet to DELETED node %d\n", hdr->destnode));
+ return;
+ }
+
+ if (node->pnn == ctdb->pnn) {
ctdb_defer_packet(ctdb, hdr);
- } else {
- node->tx_cnt++;
- if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
- ctdb_fatal(ctdb, "Unable to queue packet\n");
- }
+ return;
+ }
+
+ if (ctdb->methods == NULL) {
+ DEBUG(DEBUG_ALERT, (__location__ " Can not queue packet. "
+ "Transport is DOWN\n"));
+ return;
+ }
+
+ node->tx_cnt++;
+ if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
+ ctdb_fatal(ctdb, "Unable to queue packet\n");
}
}
+
+
+/*
+ a valgrind hack to allow us to get opcode specific backtraces
+ very ugly, and relies on no compiler optimisation!
+*/
+void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode)
+{
+ switch (opcode) {
+#define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break
+ DO_OP(1);
+ DO_OP(2);
+ DO_OP(3);
+ DO_OP(4);
+ DO_OP(5);
+ DO_OP(6);
+ DO_OP(7);
+ DO_OP(8);
+ DO_OP(9);
+ DO_OP(10);
+ DO_OP(11);
+ DO_OP(12);
+ DO_OP(13);
+ DO_OP(14);
+ DO_OP(15);
+ DO_OP(16);
+ DO_OP(17);
+ DO_OP(18);
+ DO_OP(19);
+ DO_OP(20);
+ DO_OP(21);
+ DO_OP(22);
+ DO_OP(23);
+ DO_OP(24);
+ DO_OP(25);
+ DO_OP(26);
+ DO_OP(27);
+ DO_OP(28);
+ DO_OP(29);
+ DO_OP(30);
+ DO_OP(31);
+ DO_OP(32);
+ DO_OP(33);
+ DO_OP(34);
+ DO_OP(35);
+ DO_OP(36);
+ DO_OP(37);
+ DO_OP(38);
+ DO_OP(39);
+ DO_OP(40);
+ DO_OP(41);
+ DO_OP(42);
+ DO_OP(43);
+ DO_OP(44);
+ DO_OP(45);
+ DO_OP(46);
+ DO_OP(47);
+ DO_OP(48);
+ DO_OP(49);
+ DO_OP(50);
+ DO_OP(51);
+ DO_OP(52);
+ DO_OP(53);
+ DO_OP(54);
+ DO_OP(55);
+ DO_OP(56);
+ DO_OP(57);
+ DO_OP(58);
+ DO_OP(59);
+ DO_OP(60);
+ DO_OP(61);
+ DO_OP(62);
+ DO_OP(63);
+ DO_OP(64);
+ DO_OP(65);
+ DO_OP(66);
+ DO_OP(67);
+ DO_OP(68);
+ DO_OP(69);
+ DO_OP(70);
+ DO_OP(71);
+ DO_OP(72);
+ DO_OP(73);
+ DO_OP(74);
+ DO_OP(75);
+ DO_OP(76);
+ DO_OP(77);
+ DO_OP(78);
+ DO_OP(79);
+ DO_OP(80);
+ DO_OP(81);
+ DO_OP(82);
+ DO_OP(83);
+ DO_OP(84);
+ DO_OP(85);
+ DO_OP(86);
+ DO_OP(87);
+ DO_OP(88);
+ DO_OP(89);
+ DO_OP(90);
+ DO_OP(91);
+ DO_OP(92);
+ DO_OP(93);
+ DO_OP(94);
+ DO_OP(95);
+ DO_OP(96);
+ DO_OP(97);
+ DO_OP(98);
+ DO_OP(99);
+ DO_OP(100);
+ default:
+ ctdb_queue_packet(ctdb, hdr);
+ break;
+ }
+}