$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
TEST_BINS=bin/ctdb_bench bin/ctdb_fetch bin/ctdb_store @INFINIBAND_BINS@
-BINS = bin/ctdb @CTDB_SCSI_IO@
+BINS = bin/ctdb @CTDB_SCSI_IO@ bin/smnotify
SBINS = bin/ctdbd
DIRS = lib bin
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tools/ctdb.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+bin/smnotify: utils/smnotify/gen_xdr.o utils/smnotify/gen_smnotify.o utils/smnotify/smnotify.o
+ @echo Linking $@
+ @$(CC) $(CFLAGS) -o $@ utils/smnotify/smnotify.o utils/smnotify/gen_xdr.o utils/smnotify/gen_smnotify.o $(LIB_FLAGS)
+
+utils/smnotify/smnotify.h: utils/smnotify/smnotify.x
+ @echo Generating $@
+ rpcgen -C -h utils/smnotify/smnotify.x > utils/smnotify/smnotify.h
+
+utils/smnotify/gen_xdr.c: utils/smnotify/smnotify.x utils/smnotify/smnotify.h
+ @echo Generating $@
+ rpcgen -C -c utils/smnotify/smnotify.x > utils/smnotify/gen_xdr.c
+
+utils/smnotify/gen_smnotify.c: utils/smnotify/smnotify.x utils/smnotify/smnotify.h
+ @echo Generating $@
+ rpcgen -C -l utils/smnotify/smnotify.x > utils/smnotify/gen_smnotify.c
+
bin/ctdb_bench: $(CTDB_CLIENT_OBJ) tests/ctdb_bench.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/ctdb_bench.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
return 0;
}
+/*
+ get a list of all tcp tickles that a node knows about for a particular vnn
+ */
+int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx, uint32_t vnn,
+ struct ctdb_control_tcp_tickle_list **list)
+{
+ int ret;
+ TDB_DATA data, outdata;
+ int32_t status;
+
+ data.dptr = (uint8_t*)&vnn;
+ data.dsize = sizeof(vnn);
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data,
+ mem_ctx, &outdata, &status, NULL, NULL);
+ if (ret != 0) {
+ DEBUG(0,(__location__ " ctdb_control for get tcp tickles failed\n"));
+ return -1;
+ }
+
+ *list = (struct ctdb_control_tcp_tickle_list *)outdata.dptr;
+
+ return status;
+}
/*
initialise the ctdb daemon for client applications
--- /dev/null
+/*
+ a talloc based red-black tree
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "rb_tree.h"
+
+#define NO_MEMORY_FATAL(p) do { if (!(p)) { \
+ DEBUG(0,("Out of memory for %s at %s\n", #p, __location__)); \
+ exit(10); \
+ }} while (0)
+
+
+trbt_tree_t *
+trbt_create(TALLOC_CTX *memctx)
+{
+ trbt_tree_t *tree;
+
+ tree = talloc_zero(memctx, trbt_tree_t);
+ NO_MEMORY_FATAL(tree);
+
+ return tree;
+}
+
+static inline trbt_node_t *
+trbt_parent(trbt_node_t *node)
+{
+ return node->parent;
+}
+
+static inline trbt_node_t *
+trbt_grandparent(trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ if(parent){
+ return parent->parent;
+ }
+ return NULL;
+}
+
+static inline trbt_node_t *
+trbt_uncle(trbt_node_t *node)
+{
+ trbt_node_t *parent, *grandparent;
+
+ parent=trbt_parent(node);
+ if(!parent){
+ return NULL;
+ }
+ grandparent=trbt_parent(parent);
+ if(!grandparent){
+ return NULL;
+ }
+ if(parent==grandparent->left){
+ return grandparent->right;
+ }
+ return grandparent->left;
+}
+
+
+static inline void trbt_insert_case1(trbt_tree_t *tree, trbt_node_t *node);
+static inline void trbt_insert_case2(trbt_tree_t *tree, trbt_node_t *node);
+
+static inline void
+trbt_rotate_left(trbt_node_t *node)
+{
+ trbt_tree_t *tree = node->tree;
+
+ if(node->parent){
+ if(node->parent->left==node){
+ node->parent->left=node->right;
+ } else {
+ node->parent->right=node->right;
+ }
+ } else {
+ tree->tree=node->right;
+ }
+ node->right->parent=node->parent;
+ node->parent=node->right;
+ node->right=node->right->left;
+ if(node->right){
+ node->right->parent=node;
+ }
+ node->parent->left=node;
+}
+
+static inline void
+trbt_rotate_right(trbt_node_t *node)
+{
+ trbt_tree_t *tree = node->tree;
+
+ if(node->parent){
+ if(node->parent->left==node){
+ node->parent->left=node->left;
+ } else {
+ node->parent->right=node->left;
+ }
+ } else {
+ tree->tree=node->left;
+ }
+ node->left->parent=node->parent;
+ node->parent=node->left;
+ node->left=node->left->right;
+ if(node->left){
+ node->left->parent=node;
+ }
+ node->parent->right=node;
+}
+
+/* NULL nodes are black by definition */
+static inline int trbt_get_color(trbt_node_t *node)
+{
+ if (node==NULL) {
+ return TRBT_BLACK;
+ }
+ return node->rb_color;
+}
+static inline int trbt_get_color_left(trbt_node_t *node)
+{
+ if (node==NULL) {
+ return TRBT_BLACK;
+ }
+ if (node->left==NULL) {
+ return TRBT_BLACK;
+ }
+ return node->left->rb_color;
+}
+static inline int trbt_get_color_right(trbt_node_t *node)
+{
+ if (node==NULL) {
+ return TRBT_BLACK;
+ }
+ if (node->right==NULL) {
+ return TRBT_BLACK;
+ }
+ return node->right->rb_color;
+}
+/* setting a NULL node to black is a nop */
+static inline void trbt_set_color(trbt_node_t *node, int color)
+{
+ if ( (node==NULL) && (color==TRBT_BLACK) ) {
+ return;
+ }
+ node->rb_color = color;
+}
+static inline void trbt_set_color_left(trbt_node_t *node, int color)
+{
+ if ( ((node==NULL)||(node->left==NULL)) && (color==TRBT_BLACK) ) {
+ return;
+ }
+ node->left->rb_color = color;
+}
+static inline void trbt_set_color_right(trbt_node_t *node, int color)
+{
+ if ( ((node==NULL)||(node->right==NULL)) && (color==TRBT_BLACK) ) {
+ return;
+ }
+ node->right->rb_color = color;
+}
+
+static inline void
+trbt_insert_case5(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *grandparent;
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ grandparent=trbt_parent(parent);
+ parent->rb_color=TRBT_BLACK;
+ grandparent->rb_color=TRBT_RED;
+ if( (node==parent->left) && (parent==grandparent->left) ){
+ trbt_rotate_right(grandparent);
+ } else {
+ trbt_rotate_left(grandparent);
+ }
+}
+
+static inline void
+trbt_insert_case4(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *grandparent;
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ grandparent=trbt_parent(parent);
+ if(!grandparent){
+ return;
+ }
+ if( (node==parent->right) && (parent==grandparent->left) ){
+ trbt_rotate_left(parent);
+ node=node->left;
+ } else if( (node==parent->left) && (parent==grandparent->right) ){
+ trbt_rotate_right(parent);
+ node=node->right;
+ }
+ trbt_insert_case5(tree, node);
+}
+
+static inline void
+trbt_insert_case3(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *grandparent;
+ trbt_node_t *parent;
+ trbt_node_t *uncle;
+
+ uncle=trbt_uncle(node);
+ if(uncle && (uncle->rb_color==TRBT_RED)){
+ parent=trbt_parent(node);
+ parent->rb_color=TRBT_BLACK;
+ uncle->rb_color=TRBT_BLACK;
+ grandparent=trbt_grandparent(node);
+ grandparent->rb_color=TRBT_RED;
+ trbt_insert_case1(tree, grandparent);
+ } else {
+ trbt_insert_case4(tree, node);
+ }
+}
+
+static inline void
+trbt_insert_case2(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ /* parent is always non-NULL here */
+ if(parent->rb_color==TRBT_BLACK){
+ return;
+ }
+ trbt_insert_case3(tree, node);
+}
+
+static inline void
+trbt_insert_case1(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ if(!parent){
+ node->rb_color=TRBT_BLACK;
+ return;
+ }
+ trbt_insert_case2(tree, node);
+}
+
+static inline trbt_node_t *
+trbt_sibling(trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ if(!parent){
+ return NULL;
+ }
+
+ if (node == parent->left) {
+ return parent->right;
+ } else {
+ return parent->left;
+ }
+}
+
+static inline trbt_node_t *
+trbt_sibline(trbt_node_t *node)
+{
+ if (node==node->parent->left) {
+ return node->parent->right;
+ } else {
+ return node->parent->left;
+ }
+}
+
+static inline void
+trbt_delete_case6(trbt_node_t *node)
+{
+ trbt_node_t *sibling, *parent;
+
+ sibling = trbt_sibling(node);
+ parent = trbt_parent(node);
+
+ trbt_set_color(sibling, parent->rb_color);
+ trbt_set_color(parent, TRBT_BLACK);
+ if (node == parent->left) {
+ trbt_set_color_right(sibling, TRBT_BLACK);
+ trbt_rotate_left(parent);
+ } else {
+ trbt_set_color_left(sibling, TRBT_BLACK);
+ trbt_rotate_right(parent);
+ }
+}
+
+
+static inline void
+trbt_delete_case5(trbt_node_t *node)
+{
+ trbt_node_t *parent, *sibling;
+
+ parent = trbt_parent(node);
+ sibling = trbt_sibling(node);
+ if ( (node == parent->left)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_left(sibling) == TRBT_RED)
+ &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_set_color_left(sibling, TRBT_BLACK);
+ trbt_rotate_right(sibling);
+ trbt_delete_case6(node);
+ return;
+ }
+ if ( (node == parent->right)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_right(sibling) == TRBT_RED)
+ &&(trbt_get_color_left(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_set_color_right(sibling, TRBT_BLACK);
+ trbt_rotate_left(sibling);
+ trbt_delete_case6(node);
+ return;
+ }
+
+ trbt_delete_case6(node);
+}
+
+static inline void
+trbt_delete_case4(trbt_node_t *node)
+{
+ trbt_node_t *sibling;
+
+ sibling = trbt_sibling(node);
+ if ( (trbt_get_color(node->parent) == TRBT_RED)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_left(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_set_color(node->parent, TRBT_BLACK);
+ } else {
+ trbt_delete_case5(node);
+ }
+}
+
+static void trbt_delete_case1(trbt_node_t *node);
+
+static inline void
+trbt_delete_case3(trbt_node_t *node)
+{
+ trbt_node_t *sibling;
+
+ sibling = trbt_sibling(node);
+ if ( (trbt_get_color(node->parent) == TRBT_BLACK)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_left(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_delete_case1(node->parent);
+ } else {
+ trbt_delete_case4(node);
+ }
+}
+
+static inline void
+trbt_delete_case2(trbt_node_t *node)
+{
+ trbt_node_t *sibling;
+
+ sibling = trbt_sibling(node);
+ if (trbt_get_color(sibling) == TRBT_RED) {
+ trbt_set_color(node->parent, TRBT_RED);
+ trbt_set_color(sibling, TRBT_BLACK);
+ if (node == node->parent->left) {
+ trbt_rotate_left(node->parent);
+ } else {
+ trbt_rotate_right(node->parent);
+ }
+ }
+ trbt_delete_case3(node);
+}
+
+static void
+trbt_delete_case1(trbt_node_t *node)
+{
+ if (!node->parent) {
+ return;
+ } else {
+ trbt_delete_case2(node);
+ }
+}
+
+static void
+delete_node(trbt_node_t *node)
+{
+ trbt_node_t *parent, *child, dc;
+
+ /* This node has two child nodes, then just copy the content
+ from the next smaller node with this node and delete the
+ predecessor instead.
+ The predecessor is guaranteed to have at most one child
+ node since its right arm must be NULL
+ (It must be NULL since we are its sucessor and we are above
+ it in the tree)
+ */
+ if (node->left != NULL && node->right != NULL) {
+ /* This node has two children, just copy the data */
+ /* find the predecessor */
+ trbt_node_t *temp = node->left;
+
+ while (temp->right != NULL) {
+ temp = temp->right;
+ }
+
+ /* swap the predecessor data and key with the node to
+ be deleted.
+ */
+ talloc_free(node->data);
+ node->data = talloc_steal(node, temp->data);
+ node->key32 = temp->key32;
+ temp->data = NULL;
+ temp->key32 = -1;
+ /* then delete the temp node.
+ this node is guaranteed to have at least one leaf child */
+ delete_node(temp);
+ return;
+ }
+
+
+ /* There is at most one child to this node to be deleted */
+ child = node->left;
+ if (node->right) {
+ child = node->right;
+ }
+
+ /* If the node to be deleted did not have any child at all we
+ create a temporary dummy node for the child and mark it black.
+ Once the delete of the node is finished, we remove this dummy
+ node, which is simple to do since it is guaranteed that it will
+ still not have any children after the delete operation.
+ This is because we dont represent the leaf-nodes as actual nodes
+ in this implementation.
+ */
+ if (!child) {
+ child = &dc;
+ child->tree = node->tree;
+ child->left=NULL;
+ child->right=NULL;
+ child->rb_color=TRBT_BLACK;
+ child->data=NULL;
+ }
+
+ /* replace node with child */
+ parent = trbt_parent(node);
+ if (parent) {
+ if (parent->left == node) {
+ parent->left = child;
+ } else {
+ parent->right = child;
+ }
+ } else {
+ node->tree->tree = child;
+ }
+ child->parent = node->parent;
+
+
+ if (node->rb_color == TRBT_BLACK) {
+ if (trbt_get_color(child) == TRBT_RED) {
+ child->rb_color = TRBT_BLACK;
+ } else {
+ trbt_delete_case1(child);
+ }
+ }
+
+ /* If we had to create a temporary dummy node to represent a black
+ leaf child we now has to delete it.
+ This is simple since this dummy node originally had no children
+ and we are guaranteed that it will also not have any children
+ after the node has been deleted and any possible rotations
+ have occured.
+
+ The only special case is if this was the last node of the tree
+ in which case we have to reset the root to NULL as well.
+ Othervise it is enough to just unlink the child from its new
+ parent.
+ */
+ if (child == &dc) {
+ if (child->parent == NULL) {
+ node->tree->tree = NULL;
+ } else if (child == child->parent->left) {
+ child->parent->left = NULL;
+ } else {
+ child->parent->right = NULL;
+ }
+ }
+
+ talloc_free(node);
+ return;
+}
+
+static inline trbt_node_t *
+trbt_create_node(trbt_tree_t *tree, trbt_node_t *parent, uint32_t key, void *data)
+{
+ trbt_node_t *node;
+
+ node=talloc_zero(tree, trbt_node_t);
+ NO_MEMORY_FATAL(node);
+
+ node->tree=tree;
+ node->rb_color=TRBT_BLACK;
+ node->parent=parent;
+ node->left=NULL;
+ node->right=NULL;
+ node->key32=key;
+ node->data=talloc_steal(node, data);
+
+ return node;
+}
+
+/* insert a new node in the tree.
+ if there is already a node with a matching key in the tree
+ we reurn an error
+ */
+int
+trbt_insert32(trbt_tree_t *tree, uint32_t key, void *data)
+{
+ trbt_node_t *node;
+
+ node=tree->tree;
+
+ /* is this the first node ?*/
+ if(!node){
+ node = trbt_create_node(tree, NULL, key, data);
+
+ tree->tree=node;
+ return 0;
+ }
+
+ /* it was not the new root so walk the tree until we find where to
+ * insert this new leaf.
+ */
+ while(1){
+ /* this node already exists, so just return an error */
+ if(key==node->key32){
+ return -1;
+ }
+ if(key<node->key32) {
+ if(!node->left){
+ /* new node to the left */
+ trbt_node_t *new_node;
+
+ new_node = trbt_create_node(tree, node, key, data);
+ node->left=new_node;
+ node=new_node;
+
+ break;
+ }
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32) {
+ if(!node->right){
+ /* new node to the right */
+ trbt_node_t *new_node;
+
+ new_node = trbt_create_node(tree, node, key, data);
+ node->right=new_node;
+ node=new_node;
+ break;
+ }
+ node=node->right;
+ continue;
+ }
+ }
+
+ /* node will now point to the newly created node */
+ node->rb_color=TRBT_RED;
+ trbt_insert_case1(tree, node);
+ return 0;
+}
+
+void *
+trbt_lookup32(trbt_tree_t *tree, uint32_t key)
+{
+ trbt_node_t *node;
+
+ node=tree->tree;
+
+ while(node){
+ if(key==node->key32){
+ return node->data;
+ }
+ if(key<node->key32){
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32){
+ node=node->right;
+ continue;
+ }
+ }
+ return NULL;
+}
+
+void
+trbt_delete32(trbt_tree_t *tree, uint32_t key)
+{
+ trbt_node_t *node;
+
+ node=tree->tree;
+
+ while(node){
+ if(key==node->key32){
+ delete_node(node);
+ return;
+ }
+ if(key<node->key32){
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32){
+ node=node->right;
+ continue;
+ }
+ }
+}
+
+
+
+# if 0
+static void printtree(trbt_node_t *node, int levels)
+{
+ int i;
+ if(node==NULL)return;
+ printtree(node->left, levels+1);
+
+ for(i=0;i<levels;i++)printf(" ");
+ printf("key:%d COLOR:%s\n",node->key32,node->rb_color==TRBT_BLACK?"BLACK":"RED");
+
+ printtree(node->right, levels+1);
+ printf("\n");
+}
+
+void print_tree(trbt_tree_t *tree)
+{
+ if(tree->tree==NULL){
+ printf("tree is empty\n");
+ return;
+ }
+ printf("---\n");
+ printtree(tree->tree->left, 1);
+ printf("root node key:%d COLOR:%s\n",tree->tree->key32,tree->tree->rb_color==TRBT_BLACK?"BLACK":"RED");
+ printtree(tree->tree->right, 1);
+ printf("===\n");
+}
+
+
+void
+test_tree(void)
+{
+ trbt_tree_t *tree;
+ char *str;
+ int i, ret;
+ int NUM=15;
+ int cnt=0;
+
+ tree=trbt_create(talloc_new(NULL));
+#if 0
+ for(i=0;i<10;i++){
+ printf("adding node %i\n",i);
+ trbt_insert32(tree, i, NULL);
+ print_tree(tree);
+ }
+ printf("deleting node %i\n",3);
+ trbt_delete32(tree, 3);
+ print_tree(tree);
+ for(i=0;i<10;i++){
+ printf("deleting node %i\n",i);
+ trbt_delete32(tree, i);
+ print_tree(tree);
+ }
+exit(0);
+#endif
+ while(++cnt){
+ int i;
+ printf("iteration : %d\n",cnt);
+ i=random()%20;
+ printf("adding node %i\n",i);
+ trbt_insert32(tree, i, NULL);
+ print_tree(tree);
+
+ i=random()%20;
+ printf("deleting node %i\n",i);
+ trbt_delete32(tree, i);
+ print_tree(tree);
+ }
+
+}
+
+#endif
--- /dev/null
+/*
+ a talloc based red-black tree
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+
+
+#define TRBT_RED 0x00
+#define TRBT_BLACK 0x01
+typedef struct _trbt_node_t {
+ struct _trbt_tree_t *tree;
+ struct _trbt_node_t *parent;
+ struct _trbt_node_t *left;
+ struct _trbt_node_t *right;
+ uint32_t rb_color;
+ uint32_t key32;
+ void *data;
+} trbt_node_t;
+
+typedef struct _trbt_tree_t {
+ trbt_node_t *tree;
+} trbt_tree_t;
+
+
+
+
+trbt_tree_t *trbt_create(TALLOC_CTX *memctx);
+void *trbt_lookup32(trbt_tree_t *tree, uint32_t key);
+int trbt_insert32(trbt_tree_t *tree, uint32_t key, void *data);
+void trbt_delete32(trbt_tree_t *tree, uint32_t key);
+
+
.\" Title: ctdb
.\" Author:
-.\" Generator: DocBook XSL Stylesheets v1.72.0 <http://docbook.sf.net/>
-.\" Date: 07/10/2007
+.\" Generator: DocBook XSL Stylesheets v1.71.0 <http://docbook.sf.net/>
+.\" Date: 08/03/2007
.\" Manual:
.\" Source:
.\"
-.TH "CTDB" "1" "07/10/2007" "" ""
+.TH "CTDB" "1" "08/03/2007" "" ""
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
ctdb is a utility to view and manage a ctdb cluster.
.SH "OPTIONS"
.PP
-\-n <node>
-.RS 4
-This specifies on which node to execute the command. Default is to run the command on the deamon running on the local host.
+\-n <vnn>
+.RS 3n
+This specifies the virtual node number on which to execute the command. Default is to run the command on the deamon running on the local host.
+.sp
+The virtual node number is an integer that describes the node in the cluster. The first node has virtual node number 0.
.RE
.PP
\-Y
-.RS 4
+.RS 3n
Produce output in machinereadable form for easier parsing by scripts. Not all commands support this option.
.RE
.PP
\-t <timeout>
-.RS 4
+.RS 3n
How long should ctdb wait for a command to complete before timing out. Default is 3 seconds.
.RE
.PP
\-? \-\-help
-.RS 4
+.RS 3n
Print some help text to the screen.
.RE
.PP
\-\-usage
-.RS 4
+.RS 3n
Print useage information to the screen.
.RE
.PP
\-d \-\-debug=<debuglevel>
-.RS 4
+.RS 3n
Change the debug level for the command. Default is 0.
.RE
.PP
\-\-socket=<filename>
-.RS 4
+.RS 3n
Specify the socketname to use when connecting to the local ctdb daemon. The default is /tmp/ctdb.socket .
.sp
You only need to specify this parameter if you run multiple ctdb daemons on the same physical host and thus can not use the default name for the domain socket.
.PP
Example output:
.sp
-.RS 4
+.RS 3n
.nf
Number of nodes:4
vnn:0 11.1.2.200 OK (THIS NODE)
.PP
Example output:
.sp
-.RS 4
+.RS 3n
.nf
response from 0 time=0.000054 sec (3 clients)
response from 1 time=0.000144 sec (2 clients)
.PP
Example output:
.sp
-.RS 4
+.RS 3n
.nf
Number of nodes:4
12.1.1.1 0
.PP
Example output:
.sp
-.RS 4
+.RS 3n
.nf
MaxRedirectCount = 3
.PP
Example output:
.sp
-.RS 4
+.RS 3n
.nf
MaxRedirectCount = 5
SeqnumFrequency = 1
.PP
Example output:
.sp
-.RS 4
+.RS 3n
.nf
CTDB version 1
num_clients 3
.PP
Example output:
.sp
-.RS 4
+.RS 3n
.nf
Number of databases:4
dbid:0x42fe72c5 name:locking.tdb path:/var/ctdb/locking.tdb.0
\fI\%http://ctdb.samba.org/\fR
.SH "COPYRIGHT/LICENSE"
.sp
-.RS 4
+.RS 3n
.nf
Copyright (C) Andrew Tridgell 2007
Copyright (C) Ronnie sahlberg 2007
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdb</title><meta name="generator" content="DocBook XSL Stylesheets V1.72.0"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry" lang="en"><a name="ctdb.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdb — clustered tdb database management utility</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cmdsynopsis"><p><code class="command">ctdb [ OPTIONS ] COMMAND ...</code> </p></div><div class="cmdsynopsis"><p><code class="command">ctdb</code> [-n <node>] [-Y] [-t <timeout>] [-? --help] [--usage] [-d --debug=<INTEGER>] [--socket=<filename>]</p></div></div><div class="refsect1" lang="en"><a name="id2488867"></a><h2>DESCRIPTION</h2><p>
+<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdb</title><meta name="generator" content="DocBook XSL Stylesheets V1.71.0"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry" lang="en"><a name="ctdb.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdb — clustered tdb database management utility</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cmdsynopsis"><p><code class="command">ctdb [ OPTIONS ] COMMAND ...</code> </p></div><div class="cmdsynopsis"><p><code class="command">ctdb</code> [-n <node>] [-Y] [-t <timeout>] [-? --help] [--usage] [-d --debug=<INTEGER>] [--socket=<filename>]</p></div></div><div class="refsect1" lang="en"><a name="id2480829"></a><h2>DESCRIPTION</h2><p>
ctdb is a utility to view and manage a ctdb cluster.
- </p></div><div class="refsect1" lang="en"><a name="id2488877"></a><h2>OPTIONS</h2><div class="variablelist"><dl><dt><span class="term">-n <node></span></dt><dd><p>
- This specifies on which node to execute the command. Default is
- to run the command on the deamon running on the local host.
+ </p></div><div class="refsect1" lang="en"><a name="id2480839"></a><h2>OPTIONS</h2><div class="variablelist"><dl><dt><span class="term">-n <vnn></span></dt><dd><p>
+ This specifies the virtual node number on which to execute the
+ command. Default is to run the command on the deamon running on
+ the local host.
+ </p><p>
+ The virtual node number is an integer that describes the node in the
+ cluster. The first node has virtual node number 0.
</p></dd><dt><span class="term">-Y</span></dt><dd><p>
Produce output in machinereadable form for easier parsing by scripts. Not all commands support this option.
</p></dd><dt><span class="term">-t <timeout></span></dt><dd><p>
You only need to specify this parameter if you run multiple ctdb
daemons on the same physical host and thus can not use the default
name for the domain socket.
- </p></dd></dl></div></div><div class="refsect1" lang="en"><a name="id2488991"></a><h2>Administrative Commands</h2><p>
+ </p></dd></dl></div></div><div class="refsect1" lang="en"><a name="id2481133"></a><h2>Administrative Commands</h2><p>
These are commands used to monitor and administrate a CTDB cluster.
- </p><div class="refsect2" lang="en"><a name="id2489000"></a><h3>status</h3><p>
+ </p><div class="refsect2" lang="en"><a name="id2481142"></a><h3>status</h3><p>
This command shows the current status of the ctdb node.
- </p><div class="refsect3" lang="en"><a name="id2489009"></a><h4>node status</h4><p>
+ </p><div class="refsect3" lang="en"><a name="id2481151"></a><h4>node status</h4><p>
Node status reflects the current status of the node. There are four possible states:
</p><p>
OK - This node is fully functional.
UNHEALTHY - A service provided by this node is malfunctioning and should be investigated. The CTDB daemon itself is operational and participates in the cluster. Its public IP address has been taken over by a different node and no services are currnetly being hosted. All unhealthy nodes should be investigated and require an administrative action to rectify.
</p><p>
BANNED - This node failed too many recovery attempts and has been banned from participating in the cluster for a period of RecoveryBanPeriod seconds. Any public IP address has been taken over by other nodes. This node does not provide any services. All banned nodes should be investigated and require an administrative action to rectify. This node does not perticipate in the CTDB cluster but can still be communicated with. I.e. ctdb commands can be sent to it.
- </p></div><div class="refsect3" lang="en"><a name="id2489061"></a><h4>generation</h4><p>
+ </p></div><div class="refsect3" lang="en"><a name="id2481202"></a><h4>generation</h4><p>
The generation id is a number that indicates the current generation
of a cluster instance. Each time a cluster goes through a
reconfiguration or a recovery its generation id will be changed.
- </p></div><div class="refsect3" lang="en"><a name="id2490207"></a><h4>VNNMAP</h4><p>
+ </p></div><div class="refsect3" lang="en"><a name="id2481215"></a><h4>VNNMAP</h4><p>
The list of Virtual Node Numbers. This is a list of all nodes that actively participates in the cluster and that share the workload of hosting the Clustered TDB database records.
Only nodes that are parcipitating in the vnnmap can become lmaster or dmaster for a database record.
- </p></div><div class="refsect3" lang="en"><a name="id2490221"></a><h4>Recovery mode</h4><p>
+ </p></div><div class="refsect3" lang="en"><a name="id2481229"></a><h4>Recovery mode</h4><p>
This is the current recovery mode of the cluster. There are two possible modes:
</p><p>
NORMAL - The cluster is fully operational.
</p><p>
RECOVERY - The cluster databases have all been frozen, pausing all services while the cluster awaits a recovery process to complete. A recovery process should finish within seconds. If a cluster is stuck in the RECOVERY state this would indicate a cluster malfunction which needs to be investigated.
- </p></div><div class="refsect3" lang="en"><a name="id2490244"></a><h4>Recovery master</h4><p>
+ </p></div><div class="refsect3" lang="en"><a name="id2481253"></a><h4>Recovery master</h4><p>
This is the cluster node that is currently designated as the recovery master. This node is responsible of monitoring the consistency of the cluster and to perform the actual recovery process when reqired.
</p></div><p>
Example: ctdb status
hash:3 lmaster:3
Recovery mode:NORMAL (0)
Recovery master:0
- </pre></div><div class="refsect2" lang="en"><a name="id2490275"></a><h3>ping</h3><p>
+ </pre></div><div class="refsect2" lang="en"><a name="id2481284"></a><h3>ping</h3><p>
This command will "ping" all CTDB daemons in the cluster to verify that they are processing commands correctly.
</p><p>
Example: ctdb ping
response from 1 time=0.000144 sec (2 clients)
response from 2 time=0.000105 sec (2 clients)
response from 3 time=0.000114 sec (2 clients)
- </pre></div><div class="refsect2" lang="en"><a name="id2490302"></a><h3>ip</h3><p>
+ </pre></div><div class="refsect2" lang="en"><a name="id2481310"></a><h3>ip</h3><p>
This command will display the list of public addresses that are provided by the cluster and which physical node is currently serving this ip.
</p><p>
Example: ctdb ip
12.1.1.2 1
12.1.1.3 2
12.1.1.4 3
- </pre></div><div class="refsect2" lang="en"><a name="id2490327"></a><h3>getvar <name></h3><p>
+ </pre></div><div class="refsect2" lang="en"><a name="id2481335"></a><h3>getvar <name></h3><p>
Get the runtime value of a tuneable variable.
</p><p>
Example: ctdb getvar MaxRedirectCount
Example output:
</p><pre class="screen">
MaxRedirectCount = 3
- </pre></div><div class="refsect2" lang="en"><a name="id2490350"></a><h3>setvar <name> <value></h3><p>
+ </pre></div><div class="refsect2" lang="en"><a name="id2528417"></a><h3>setvar <name> <value></h3><p>
Set the runtime value of a tuneable variable.
</p><p>
Example: ctdb setvar MaxRedirectCount 5
- </p></div><div class="refsect2" lang="en"><a name="id2490365"></a><h3>listvars</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528432"></a><h3>listvars</h3><p>
List all tuneable variables.
</p><p>
Example: ctdb listvars
EventScriptTimeout = 20
RecoveryGracePeriod = 60
RecoveryBanPeriod = 300
- </pre></div><div class="refsect2" lang="en"><a name="id2490393"></a><h3>statistics</h3><p>
+ </pre></div><div class="refsect2" lang="en"><a name="id2528460"></a><h3>statistics</h3><p>
Collect statistics from the CTDB daemon about how many calls it has served.
</p><p>
Example: ctdb statistics
max_hop_count 0
max_call_latency 4.948321 sec
max_lockwait_latency 0.000000 sec
- </pre></div><div class="refsect2" lang="en"><a name="id2490436"></a><h3>statisticsreset</h3><p>
+ </pre></div><div class="refsect2" lang="en"><a name="id2528503"></a><h3>statisticsreset</h3><p>
This command is used to clear all statistics counters in a node.
</p><p>
Example: ctdb statisticsreset
- </p></div><div class="refsect2" lang="en"><a name="id2490450"></a><h3>getdebug</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528518"></a><h3>getdebug</h3><p>
Get the current debug level for the node. the debug level controls what information is written to the log file.
- </p></div><div class="refsect2" lang="en"><a name="id2490461"></a><h3>setdebug <debuglevel></h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528529"></a><h3>setdebug <debuglevel></h3><p>
Set the debug level of a node. This is a number between 0 and 9 and controls what information will be written to the logfile.
- </p></div><div class="refsect2" lang="en"><a name="id2536585"></a><h3>getpid</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528540"></a><h3>getpid</h3><p>
This command will return the process id of the ctdb daemon.
- </p></div><div class="refsect2" lang="en"><a name="id2536595"></a><h3>disable</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528551"></a><h3>disable</h3><p>
This command is used to administratively disable a node in the cluster.
A disabled node will still participate in the cluster and host
clustered TDB records but its public ip address has been taken over by
a different node and it no longer hosts any services.
- </p></div><div class="refsect2" lang="en"><a name="id2536613"></a><h3>enable</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528564"></a><h3>enable</h3><p>
Re-enable a node that has been administratively disabled.
- </p></div><div class="refsect2" lang="en"><a name="id2536623"></a><h3>ban <bantime|0></h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528575"></a><h3>ban <bantime|0></h3><p>
Administratively ban a node for bantime seconds. A bantime of 0 means that the node should be permanently banned.
</p><p>
A banned node does not participate in the cluster and does not host any records for the clustered TDB. Its ip address has been taken over by an other node and no services are hosted.
</p><p>
Nodes are automatically banned if they are the cause of too many
cluster recoveries.
- </p></div><div class="refsect2" lang="en"><a name="id2536646"></a><h3>unban</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528598"></a><h3>unban</h3><p>
This command is used to unban a node that has either been
administratively banned using the ban command or has been automatically
banned by the recovery daemon.
- </p></div><div class="refsect2" lang="en"><a name="id2536658"></a><h3>shutdown</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528610"></a><h3>shutdown</h3><p>
This command will shutdown a specific CTDB daemon.
- </p></div><div class="refsect2" lang="en"><a name="id2536668"></a><h3>recover</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528620"></a><h3>recover</h3><p>
This command will trigger the recovery daemon to do a cluster
recovery.
- </p></div><div class="refsect2" lang="en"><a name="id2536679"></a><h3>killtcp <srcip:port> <dstip:port></h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528631"></a><h3>killtcp <srcip:port> <dstip:port></h3><p>
This command will kill the specified TCP connection by issuing a
TCP RST to the srcip:port endpoint.
- </p></div><div class="refsect2" lang="en"><a name="id2536690"></a><h3>tickle <srcip:port> <dstip:port></h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528642"></a><h3>tickle <srcip:port> <dstip:port></h3><p>
This command will will send a TCP tickle to the source host for the
specified TCP connection.
A TCP tickle is a TCP ACK packet with an invalid sequence and
TCP connection has been disrupted and that the client will need
to reestablish. This greatly speeds up the time it takes for a client
to detect and reestablish after an IP failover in the ctdb cluster.
- </p></div></div><div class="refsect1" lang="en"><a name="id2536716"></a><h2>Debugging Commands</h2><p>
+ </p></div></div><div class="refsect1" lang="en"><a name="id2528668"></a><h2>Debugging Commands</h2><p>
These commands are primarily used for CTDB development and testing and
should not be used for normal administration.
- </p><div class="refsect2" lang="en"><a name="id2536726"></a><h3>process-exists <pid></h3><p>
+ </p><div class="refsect2" lang="en"><a name="id2528678"></a><h3>process-exists <pid></h3><p>
This command checks if a specific process exists on the CTDB host. This is mainly used by Samba to check if remote instances of samba are still running or not.
- </p></div><div class="refsect2" lang="en"><a name="id2536738"></a><h3>getdbmap</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528691"></a><h3>getdbmap</h3><p>
This command lists all clustered TDB databases that the CTDB daemon has attahced to.
</p><p>
Example: ctdb getdbmap
dbid:0x1421fb78 name:brlock.tdb path:/var/ctdb/brlock.tdb.0
dbid:0x17055d90 name:connections.tdb path:/var/ctdb/connections.tdb.0
dbid:0xc0bdde6a name:sessionid.tdb path:/var/ctdb/sessionid.tdb.0
- </pre></div><div class="refsect2" lang="en"><a name="id2536766"></a><h3>catdb <dbname></h3><p>
+ </pre></div><div class="refsect2" lang="en"><a name="id2528718"></a><h3>catdb <dbname></h3><p>
This command will dump a clustered TDB database to the screen. This is a debugging command.
- </p></div><div class="refsect2" lang="en"><a name="id2536777"></a><h3>getmonmode</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528729"></a><h3>getmonmode</h3><p>
This command returns the monutoring mode of a node. The monitoring mode is either ACTIVE or DISABLED. Normally a node will continously monitor that all other nodes that are expected are in fact connected and that they respond to commands.
</p><p>
ACTIVE - This is the normal mode. The node is actively monitoring all other nodes, both that the transport is connected and also that the node responds to commands. If a node becomes unavailable, it will be marked as DISCONNECTED and a recovery is initiated to restore the cluster.
</p><p>
DISABLED - This node is not monitoring that other nodes are available. In this mode a node failure will not be detected and no recovery will be performed. This mode is useful when for debugging purposes one wants to attach GDB to a ctdb process but wants to prevent the rest of the cluster from marking this node as DISCONNECTED and do a recovery.
- </p></div><div class="refsect2" lang="en"><a name="id2536808"></a><h3>setmonmode <0|1></h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528760"></a><h3>setmonmode <0|1></h3><p>
This command can be used to explicitely disable/enable monitoring mode on a node. The main purpose is if one wants to attach GDB to a running ctdb daemon but wants to prevent the other nodes from marking it as DISCONNECTED and issuing a recovery. To do this, set monitoring mode to 0 on all nodes before attaching with GDB. Remember to set monitoring mode back to 1 afterwards.
- </p></div><div class="refsect2" lang="en"><a name="id2536823"></a><h3>attach <dbname></h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528776"></a><h3>attach <dbname></h3><p>
This is a debugging command. This command will make the CTDB daemon create a new CTDB database and attach to it.
- </p></div><div class="refsect2" lang="en"><a name="id2536835"></a><h3>dumpmemory</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528787"></a><h3>dumpmemory</h3><p>
This is a debugging command. This command will make the ctdb daemon to write a fill memory allocation map to the log file.
- </p></div><div class="refsect2" lang="en"><a name="id2536846"></a><h3>freeze</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528798"></a><h3>freeze</h3><p>
This command will lock all the local TDB databases causing clients
that are accessing these TDBs such as samba3 to block until the
databases are thawed.
This is primarily used by the recovery daemon to stop all samba
daemons from accessing any databases while the database is recovered
and rebuilt.
- </p></div><div class="refsect2" lang="en"><a name="id2536864"></a><h3>thaw</h3><p>
+ </p></div><div class="refsect2" lang="en"><a name="id2528816"></a><h3>thaw</h3><p>
Thaw a previously frozen node.
- </p></div></div><div class="refsect1" lang="en"><a name="id2536875"></a><h2>SEE ALSO</h2><p>
+ </p></div></div><div class="refsect1" lang="en"><a name="id2528827"></a><h2>SEE ALSO</h2><p>
ctdbd(1), onnode(1)
<a href="http://ctdb.samba.org/" target="_top">http://ctdb.samba.org/</a>
- </p></div><div class="refsect1" lang="en"><a name="id2536888"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
+ </p></div><div class="refsect1" lang="en"><a name="id2528840"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
Copyright (C) Andrew Tridgell 2007<br>
Copyright (C) Ronnie sahlberg 2007<br>
<br>
<title>OPTIONS</title>
<variablelist>
- <varlistentry><term>-n <node></term>
+ <varlistentry><term>-n <vnn></term>
<listitem>
<para>
- This specifies on which node to execute the command. Default is
- to run the command on the deamon running on the local host.
+ This specifies the virtual node number on which to execute the
+ command. Default is to run the command on the deamon running on
+ the local host.
+ </para>
+ <para>
+ The virtual node number is an integer that describes the node in the
+ cluster. The first node has virtual node number 0.
</para>
</listitem>
</varlistentry>
#define CTDB_NULL_FUNC 0xFF000001
#define CTDB_FETCH_FUNC 0xFF000002
+/*
+ a tcp connection description
+ */
+struct ctdb_tcp_connection {
+ struct sockaddr_in saddr;
+ struct sockaddr_in daddr;
+};
+
+/* the wire representation for a tcp tickle array */
+struct ctdb_tcp_wire_array {
+ uint32_t num;
+ struct ctdb_tcp_connection connections[1];
+};
+
+/* the list of tcp tickles used by get/set tcp tickle list */
+struct ctdb_control_tcp_tickle_list {
+ uint32_t vnn;
+ struct ctdb_tcp_wire_array tickles;
+};
+
+/*
+ array of tcp connections
+ */
+struct ctdb_tcp_array {
+ uint32_t num;
+ struct ctdb_tcp_connection *connections;
+};
+
+
/* all tunable variables go in here */
struct ctdb_tunable {
uint32_t max_redirect_count;
uint32_t election_timeout;
uint32_t takeover_timeout;
uint32_t monitor_interval;
+ uint32_t tickle_update_interval;
uint32_t script_timeout;
uint32_t recovery_grace_period;
uint32_t recovery_ban_period;
/* the node number that has taken over this nodes public address, if any.
If not taken over, then set to -1 */
int32_t takeover_vnn;
+
+ /* List of clients to tickle for this public address */
+ struct ctdb_tcp_array *tcp_array;
+
+ /* whether we need to update the other nodes with changes to our list
+ of connected clients */
+ bool tcp_update_needed;
};
/*
uint32_t recovery_mode;
uint32_t monitoring_mode;
TALLOC_CTX *monitor_context;
+ TALLOC_CTX *tickle_update_context;
struct ctdb_tunable tunable;
enum ctdb_freeze_mode freeze_mode;
struct ctdb_freeze_handle *freeze_handle;
uint32_t recovery_master;
struct ctdb_call_state *pending_calls;
struct ctdb_takeover takeover;
- struct ctdb_tcp_list *tcp_list;
struct ctdb_client_ip *client_ip_list;
bool do_setsched;
void *saved_scheduler_param;
CTDB_CONTROL_MODIFY_FLAGS = 52,
CTDB_CONTROL_GET_ALL_TUNABLES = 53,
CTDB_CONTROL_KILL_TCP = 54,
+ CTDB_CONTROL_GET_TCP_TICKLE_LIST = 55,
+ CTDB_CONTROL_SET_TCP_TICKLE_LIST = 56,
};
/*
void ctdb_stop_monitoring(struct ctdb_context *ctdb);
void ctdb_start_monitoring(struct ctdb_context *ctdb);
+void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb);
void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap);
int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
- uint32_t srcnode, TDB_DATA indata);
+ TDB_DATA indata);
int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn);
int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata);
+int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata);
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
uint32_t destnode,
struct ctdb_control_killtcp *killtcp);
+int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
+ struct timeval timeout,
+ uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ uint32_t vnn,
+ struct ctdb_control_tcp_tickle_list **list);
#endif
case CTDB_CONTROL_TCP_CLIENT:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_tcp));
- return ctdb_control_tcp_client(ctdb, client_id, srcnode, indata);
+ return ctdb_control_tcp_client(ctdb, client_id, indata);
case CTDB_CONTROL_STARTUP:
CHECK_CONTROL_DATA_SIZE(0);
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_tcp_vnn));
return ctdb_control_tcp_add(ctdb, indata);
- case CTDB_CONTROL_TCP_REMOVE:
- CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_tcp_vnn));
- return ctdb_control_tcp_remove(ctdb, indata);
-
case CTDB_CONTROL_SET_TUNABLE:
return ctdb_control_set_tunable(ctdb, indata);
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_killtcp));
return ctdb_control_kill_tcp(ctdb, indata);
+ case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ return ctdb_control_get_tcp_tickle_list(ctdb, indata, outdata);
+
+ case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
+ /* data size is verified in the called function */
+ return ctdb_control_set_tcp_tickle_list(ctdb, indata);
+
default:
DEBUG(0,(__location__ " Unknown CTDB control opcode %u\n", opcode));
return -1;
/* start monitoring for dead nodes */
ctdb_start_monitoring(ctdb);
+
+ /* start periodic update of tcp tickle lists */
+ ctdb_start_tcp_tickle_update(ctdb);
}
/* go into main ctdb loop */
struct ctdb_context *ctdb;
uint32_t count;
struct sockaddr_in sin;
- struct ctdb_tcp_list *tcp_list;
+ struct ctdb_tcp_array *tcparray;
};
+
/*
lists of tcp endpoints
*/
struct ctdb_tcp_list {
struct ctdb_tcp_list *prev, *next;
- uint32_t vnn;
- struct sockaddr_in saddr;
- struct sockaddr_in daddr;
+ struct ctdb_tcp_connection connection;
};
-
/*
list of clients to kill on IP release
*/
{
struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
struct ctdb_takeover_arp);
- int s, ret;
- struct ctdb_tcp_list *tcp;
+ int i, s, ret;
+ struct ctdb_tcp_array *tcparray;
+
ret = ctdb_sys_send_arp(&arp->sin, arp->ctdb->takeover.interface);
if (ret != 0) {
return;
}
- for (tcp=arp->tcp_list;tcp;tcp=tcp->next) {
- DEBUG(2,("sending tcp tickle ack for %u->%s:%u\n",
- (unsigned)ntohs(tcp->daddr.sin_port),
- inet_ntoa(tcp->saddr.sin_addr),
- (unsigned)ntohs(tcp->saddr.sin_port)));
- ret = ctdb_sys_send_tcp(s, &tcp->saddr, &tcp->daddr, 0, 0, 0);
- if (ret != 0) {
- DEBUG(0,(__location__ " Failed to send tcp tickle ack for %s\n",
- inet_ntoa(tcp->saddr.sin_addr)));
+ tcparray = arp->tcparray;
+ if (tcparray) {
+ for (i=0;i<tcparray->num;i++) {
+ DEBUG(2,("sending tcp tickle ack for %u->%s:%u\n",
+ (unsigned)ntohs(tcparray->connections[i].daddr.sin_port),
+ inet_ntoa(tcparray->connections[i].saddr.sin_addr),
+ (unsigned)ntohs(tcparray->connections[i].saddr.sin_port)));
+ ret = ctdb_sys_send_tcp(s, &tcparray->connections[i].saddr,
+ &tcparray->connections[i].daddr, 0, 0, 0);
+ if (ret != 0) {
+ DEBUG(0,(__location__ " Failed to send tcp tickle ack for %s\n",
+ inet_ntoa(tcparray->connections[i].saddr.sin_addr)));
+ }
}
}
struct takeover_callback_state {
struct ctdb_req_control *c;
struct sockaddr_in *sin;
+ struct ctdb_node *node;
};
/*
talloc_get_type(private_data, struct takeover_callback_state);
struct ctdb_takeover_arp *arp;
char *ip = inet_ntoa(state->sin->sin_addr);
- struct ctdb_tcp_list *tcp;
+ struct ctdb_tcp_array *tcparray;
ctdb_start_monitoring(ctdb);
arp->ctdb = ctdb;
arp->sin = *state->sin;
- /* add all of the known tcp connections for this IP to the
- list of tcp connections to send tickle acks for */
- for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
- if (state->sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
- struct ctdb_tcp_list *t2 = talloc(arp, struct ctdb_tcp_list);
- if (t2 == NULL) goto failed;
- *t2 = *tcp;
- DLIST_ADD(arp->tcp_list, t2);
- }
+ tcparray = state->node->tcp_array;
+ if (tcparray) {
+ /* add all of the known tcp connections for this IP to the
+ list of tcp connections to send tickle acks for */
+ arp->tcparray = talloc_steal(arp, tcparray);
+
+ state->node->tcp_array = NULL;
+ state->node->tcp_update_needed = true;
}
event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx,
return;
}
+/*
+ Find the vnn of the node that has a public ip address
+ returns -1 if the address is not known as a public address
+ */
+static int32_t find_public_ip_vnn(struct ctdb_context *ctdb, char *ip)
+{
+ int32_t vnn = -1;
+ int i;
+
+ for (i=0;i<ctdb->num_nodes;i++) {
+ if (!strcmp(ip, ctdb->nodes[i]->public_address)) {
+ vnn = i;
+ break;
+ }
+ }
+
+ return vnn;
+}
+
+
/*
take over an ip address
*/
struct takeover_callback_state *state;
struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
char *ip = inet_ntoa(pip->sin.sin_addr);
-
+ struct ctdb_node *node = ctdb->nodes[pip->vnn];
/* update out node table */
- ctdb->nodes[pip->vnn]->takeover_vnn = pip->takeover_vnn;
+ node->takeover_vnn = pip->takeover_vnn;
/* if our kernel already has this IP, do nothing */
if (ctdb_sys_have_ip(ip)) {
CTDB_NO_MEMORY(ctdb, state->sin);
*state->sin = pip->sin;
+ state->node = node;
+
DEBUG(0,("Takover of IP %s/%u on interface %s\n",
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
ctdb->takeover.interface));
talloc_get_type(private_data, struct takeover_callback_state);
char *ip = inet_ntoa(state->sin->sin_addr);
TDB_DATA data;
- struct ctdb_tcp_list *tcp;
ctdb_start_monitoring(ctdb);
/* kill clients that have registered with this IP */
release_kill_clients(ctdb, state->sin->sin_addr);
-
- /* tell other nodes about any tcp connections we were holding with this IP */
- for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
- if (tcp->vnn == ctdb->vnn &&
- state->sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
- struct ctdb_control_tcp_vnn t;
-
- t.vnn = ctdb->vnn;
- t.src = tcp->saddr;
- t.dest = tcp->daddr;
-
- data.dptr = (uint8_t *)&t;
- data.dsize = sizeof(t);
-
- ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
- CTDB_CONTROL_TCP_ADD,
- 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
- }
- }
-
/* the control succeeded */
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
talloc_free(state);
}
-
/*
release an ip address
*/
struct takeover_callback_state *state;
struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
char *ip = inet_ntoa(pip->sin.sin_addr);
+ struct ctdb_node *node = ctdb->nodes[pip->vnn];
/* update out node table */
ctdb->nodes[pip->vnn]->takeover_vnn = pip->takeover_vnn;
CTDB_NO_MEMORY(ctdb, state->sin);
*state->sin = pip->sin;
+ state->node = node;
+
ctdb_stop_monitoring(ctdb);
ret = ctdb_event_script_callback(ctdb,
called by a client to inform us of a TCP connection that it is managing
that should tickled with an ACK when IP takeover is done
*/
-int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, uint32_t vnn,
+int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
TDB_DATA indata)
{
struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
int ret;
TDB_DATA data;
struct ctdb_client_ip *ip;
+ char *addr;
+ int32_t takeover_vnn;
ip = talloc(client, struct ctdb_client_ip);
CTDB_NO_MEMORY(ctdb, ip);
tcp = talloc(client, struct ctdb_tcp_list);
CTDB_NO_MEMORY(ctdb, tcp);
- tcp->vnn = vnn;
- tcp->saddr = p->src;
- tcp->daddr = p->dest;
+ addr = inet_ntoa(p->dest.sin_addr);
+
+ takeover_vnn = find_public_ip_vnn(ctdb, addr);
+ if (takeover_vnn == -1) {
+ DEBUG(3,("Could not add client IP %s. This is not a public address.\n", addr));
+ return -1;
+ }
+
+ addr = inet_ntoa(p->src.sin_addr);
+
+ tcp->connection.saddr = p->src;
+ tcp->connection.daddr = p->dest;
DLIST_ADD(client->tcp_list, tcp);
- t.vnn = vnn;
+ t.vnn = takeover_vnn;
t.src = p->src;
t.dest = p->dest;
DEBUG(2,("registered tcp client for %u->%s:%u\n",
(unsigned)ntohs(p->dest.sin_port),
- inet_ntoa(p->src.sin_addr),
+ addr,
(unsigned)ntohs(p->src.sin_port)));
/* tell all nodes about this tcp connection */
/*
find a tcp address on a list
*/
-static struct ctdb_tcp_list *ctdb_tcp_find(struct ctdb_tcp_list *list,
- struct ctdb_tcp_list *tcp)
+static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
+ struct ctdb_tcp_connection *tcp)
{
- while (list) {
- if (same_sockaddr_in(&list->saddr, &tcp->saddr) &&
- same_sockaddr_in(&list->daddr, &tcp->daddr)) {
- return list;
+ int i;
+
+ if (array == NULL) {
+ return NULL;
+ }
+
+ for (i=0;i<array->num;i++) {
+ if (same_sockaddr_in(&array->connections[i].saddr, &tcp->saddr) &&
+ same_sockaddr_in(&array->connections[i].daddr, &tcp->daddr)) {
+ return &array->connections[i];
}
- list = list->next;
}
return NULL;
}
int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
- struct ctdb_tcp_list *tcp;
-
- tcp = talloc(ctdb, struct ctdb_tcp_list);
- CTDB_NO_MEMORY(ctdb, tcp);
+ struct ctdb_tcp_array *tcparray;
+ struct ctdb_tcp_connection tcp;
+
+ tcparray = ctdb->nodes[p->vnn]->tcp_array;
+
+ /* If this is the first tickle */
+ if (tcparray == NULL) {
+ tcparray = talloc_size(ctdb->nodes,
+ offsetof(struct ctdb_tcp_array, connections) +
+ sizeof(struct ctdb_tcp_connection) * 1);
+ CTDB_NO_MEMORY(ctdb, tcparray);
+ ctdb->nodes[p->vnn]->tcp_array = tcparray;
+
+ tcparray->num = 0;
+ tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
+ CTDB_NO_MEMORY(ctdb, tcparray->connections);
+
+ tcparray->connections[tcparray->num].saddr = p->src;
+ tcparray->connections[tcparray->num].daddr = p->dest;
+ tcparray->num++;
+ return 0;
+ }
- tcp->vnn = p->vnn;
- tcp->saddr = p->src;
- tcp->daddr = p->dest;
- if (NULL == ctdb_tcp_find(ctdb->tcp_list, tcp)) {
- DLIST_ADD(ctdb->tcp_list, tcp);
- DEBUG(2,("Added tickle info for %s:%u from vnn %u\n",
- inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
- tcp->vnn));
- } else {
+ /* Do we already have this tickle ?*/
+ tcp.saddr = p->src;
+ tcp.daddr = p->dest;
+ if (ctdb_tcp_find(ctdb->nodes[p->vnn]->tcp_array, &tcp) != NULL) {
DEBUG(4,("Already had tickle info for %s:%u from vnn %u\n",
- inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
- tcp->vnn));
+ inet_ntoa(tcp.daddr.sin_addr),
+ ntohs(tcp.daddr.sin_port),
+ p->vnn));
+ return 0;
}
+ /* A new tickle, we must add it to the array */
+ tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
+ struct ctdb_tcp_connection,
+ tcparray->num+1);
+ CTDB_NO_MEMORY(ctdb, tcparray->connections);
+
+ ctdb->nodes[p->vnn]->tcp_array = tcparray;
+ tcparray->connections[tcparray->num].saddr = p->src;
+ tcparray->connections[tcparray->num].daddr = p->dest;
+ tcparray->num++;
+
+ DEBUG(2,("Added tickle info for %s:%u from vnn %u\n",
+ inet_ntoa(tcp.daddr.sin_addr),
+ ntohs(tcp.daddr.sin_port),
+ p->vnn));
+
return 0;
}
clients managing that should tickled with an ACK when IP takeover is
done
*/
-int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
+static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
{
- struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
- struct ctdb_tcp_list t, *tcp;
-
- t.vnn = p->vnn;
- t.saddr = p->src;
- t.daddr = p->dest;
-
- tcp = ctdb_tcp_find(ctdb->tcp_list, &t);
- if (tcp) {
- DEBUG(2,("Removed tickle info for %s:%u from vnn %u\n",
- inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
- tcp->vnn));
- DLIST_REMOVE(ctdb->tcp_list, tcp);
- talloc_free(tcp);
+ struct ctdb_tcp_connection *tcpp;
+ int32_t vnn = find_public_ip_vnn(ctdb, inet_ntoa(conn->daddr.sin_addr));
+ struct ctdb_node *node;
+
+ if (vnn == -1) {
+ DEBUG(0,(__location__ " unable to find public address %s\n", inet_ntoa(conn->daddr.sin_addr)));
+ return;
}
- return 0;
+ node = ctdb->nodes[vnn];
+
+ /* if the array is empty we cant remove it
+ and we dont need to do anything
+ */
+ if (node->tcp_array == NULL) {
+ DEBUG(2,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
+ inet_ntoa(conn->daddr.sin_addr),
+ ntohs(conn->daddr.sin_port)));
+ return;
+ }
+
+
+ /* See if we know this connection
+ if we dont know this connection then we dont need to do anything
+ */
+ tcpp = ctdb_tcp_find(node->tcp_array, conn);
+ if (tcpp == NULL) {
+ DEBUG(2,("Trying to remove tickle that doesnt exist %s:%u\n",
+ inet_ntoa(conn->daddr.sin_addr),
+ ntohs(conn->daddr.sin_port)));
+ return;
+ }
+
+
+ /* We need to remove this entry from the array.
+ Instead of allocating a new array and copying data to it
+ we cheat and just copy the last entry in the existing array
+ to the entry that is to be removed and just shring the
+ ->num field
+ */
+ *tcpp = node->tcp_array->connections[node->tcp_array->num - 1];
+ node->tcp_array->num--;
+
+ /* If we deleted the last entry we also need to remove the entire array
+ */
+ if (node->tcp_array->num == 0) {
+ talloc_free(node->tcp_array);
+ node->tcp_array = NULL;
+ }
+
+ node->tcp_update_needed = true;
+
+ DEBUG(2,("Removed tickle info for %s:%u\n",
+ inet_ntoa(conn->saddr.sin_addr),
+ ntohs(conn->saddr.sin_port)));
}
/*
- called when a daemon restarts - wipes all tcp entries from that vnn
+ called when a daemon restarts - send all tickes for all public addresses
+ we are serving immediately to the new node.
*/
int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
{
- struct ctdb_tcp_list *tcp, *next;
- for (tcp=ctdb->tcp_list;tcp;tcp=next) {
- next = tcp->next;
- if (tcp->vnn == vnn) {
- DLIST_REMOVE(ctdb->tcp_list, tcp);
- talloc_free(tcp);
- }
-
- /* and tell the new guy about any that he should have
- from us */
- if (tcp->vnn == ctdb->vnn) {
- struct ctdb_control_tcp_vnn t;
- TDB_DATA data;
-
- t.vnn = tcp->vnn;
- t.src = tcp->saddr;
- t.dest = tcp->daddr;
-
- data.dptr = (uint8_t *)&t;
- data.dsize = sizeof(t);
-
- ctdb_daemon_send_control(ctdb, vnn, 0,
- CTDB_CONTROL_TCP_ADD,
- 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
- }
- }
+/*XXX here we should send all tickes we are serving to the new node */
return 0;
}
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
{
while (client->tcp_list) {
- TDB_DATA data;
- struct ctdb_control_tcp_vnn p;
struct ctdb_tcp_list *tcp = client->tcp_list;
DLIST_REMOVE(client->tcp_list, tcp);
- p.vnn = tcp->vnn;
- p.src = tcp->saddr;
- p.dest = tcp->daddr;
- data.dptr = (uint8_t *)&p;
- data.dsize = sizeof(p);
- if (ctdb_sys_have_ip(inet_ntoa(p.dest.sin_addr))) {
- ctdb_daemon_send_control(client->ctdb, CTDB_BROADCAST_CONNECTED, 0,
- CTDB_CONTROL_TCP_REMOVE,
- 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
- }
- talloc_free(tcp);
+ ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
}
}
return 0;
}
+
+/*
+ called by a daemon to inform us of the entire list of TCP tickles for
+ a particular public address.
+ this control should only be sent by the node that is currently serving
+ that public address.
+ */
+int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
+ struct ctdb_tcp_array *tcparray;
+
+ /* We must at least have tickles.num or else we cant verify the size
+ of the received data blob
+ */
+ if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
+ tickles.connections)) {
+ DEBUG(0,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
+ return -1;
+ }
+
+ /* verify that the size of data matches what we expect */
+ if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
+ tickles.connections)
+ + sizeof(struct ctdb_tcp_connection)
+ * list->tickles.num) {
+ DEBUG(0,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
+ return -1;
+ }
+
+ /* Make sure the vnn looks sane */
+ if (!ctdb_validate_vnn(ctdb, list->vnn)) {
+ DEBUG(0,("Bad indata in ctdb_control_set_tcp_tickle_list. Invalid vnn: %u\n", list->vnn));
+ return -1;
+ }
+
+
+ /* remove any old ticklelist we might have */
+ talloc_free(ctdb->nodes[list->vnn]->tcp_array);
+ ctdb->nodes[list->vnn]->tcp_array = NULL;
+
+ tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
+ CTDB_NO_MEMORY(ctdb, tcparray);
+
+ tcparray->num = list->tickles.num;
+
+ tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
+ CTDB_NO_MEMORY(ctdb, tcparray->connections);
+
+ memcpy(tcparray->connections, &list->tickles.connections[0],
+ sizeof(struct ctdb_tcp_connection)*tcparray->num);
+
+ /* We now have a new fresh tickle list array for this vnn */
+ ctdb->nodes[list->vnn]->tcp_array = tcparray;
+
+ return 0;
+}
+
+/*
+ called to return the full list of tickles for the puclic address associated
+ with the provided vnn
+ */
+int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
+{
+ uint32_t vnn = *(uint32_t *)indata.dptr;
+ struct ctdb_control_tcp_tickle_list *list;
+ struct ctdb_tcp_array *tcparray;
+ int num;
+
+
+ /* Make sure the vnn looks sane */
+ if (!ctdb_validate_vnn(ctdb, vnn)) {
+ DEBUG(0,("Bad indata in ctdb_control_get_tcp_tickle_list. Invalid vnn: %u\n", vnn));
+ return -1;
+ }
+
+ tcparray = ctdb->nodes[vnn]->tcp_array;
+ if (tcparray) {
+ num = tcparray->num;
+ } else {
+ num = 0;
+ }
+
+ outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
+ tickles.connections)
+ + sizeof(struct ctdb_tcp_connection) * num;
+
+ outdata->dptr = talloc_size(outdata, outdata->dsize);
+ CTDB_NO_MEMORY(ctdb, outdata->dptr);
+ list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
+
+ list->vnn = vnn;
+ list->tickles.num = num;
+ if (num) {
+ memcpy(&list->tickles.connections[0], tcparray->connections,
+ sizeof(struct ctdb_tcp_connection) * num);
+ }
+
+ return 0;
+}
+
+
+/*
+ set the list of all tcp tickles for a public address
+ */
+static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ uint32_t vnn,
+ struct ctdb_tcp_array *tcparray)
+{
+ int ret, num;
+ TDB_DATA data;
+ struct ctdb_control_tcp_tickle_list *list;
+
+ if (tcparray) {
+ num = tcparray->num;
+ } else {
+ num = 0;
+ }
+
+ data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
+ tickles.connections) +
+ sizeof(struct ctdb_tcp_connection) * num;
+ data.dptr = talloc_size(ctdb, data.dsize);
+ CTDB_NO_MEMORY(ctdb, data.dptr);
+
+ list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
+ list->vnn = vnn;
+ list->tickles.num = num;
+ if (tcparray) {
+ memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
+ }
+
+ ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
+ CTDB_CONTROL_SET_TCP_TICKLE_LIST,
+ 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
+ if (ret != 0) {
+ DEBUG(0,(__location__ " ctdb_control for set tcp tickles failed\n"));
+ return -1;
+ }
+
+ talloc_free(data.dptr);
+
+ return ret;
+}
+
+
+/*
+ perform tickle updates if required
+ */
+static void ctdb_update_tcp_tickles(struct event_context *ev,
+ struct timed_event *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+ int i, ret;
+
+ for (i=0;i<ctdb->num_nodes;i++) {
+ struct ctdb_node *node = ctdb->nodes[i];
+
+ /* we only send out updates for public addresses that we
+ have taken over
+ */
+ if (ctdb->vnn != node->takeover_vnn) {
+ continue;
+ }
+ /* We only send out the updates if we need to */
+ if (!node->tcp_update_needed) {
+ continue;
+ }
+
+ ret = ctdb_ctrl_set_tcp_tickles(ctdb,
+ TAKEOVER_TIMEOUT(),
+ CTDB_BROADCAST_CONNECTED,
+ node->takeover_vnn,
+ node->tcp_array);
+ if (ret != 0) {
+ DEBUG(0,("Failed to send the tickle update for public address %s\n", node->public_address));
+ }
+ }
+
+ event_add_timed(ctdb->ev, ctdb->tickle_update_context,
+ timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
+ ctdb_update_tcp_tickles, ctdb);
+}
+
+
+/*
+ start periodic update of tcp tickles
+ */
+void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
+{
+ ctdb->tickle_update_context = talloc_new(ctdb);
+
+ event_add_timed(ctdb->ev, ctdb->tickle_update_context,
+ timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
+ ctdb_update_tcp_tickles, ctdb);
+}
{ "ElectionTimeout", 3, offsetof(struct ctdb_tunable, election_timeout) },
{ "TakeoverTimeout", 5, offsetof(struct ctdb_tunable, takeover_timeout) },
{ "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) },
+ { "TickleUpdateInterval",20, offsetof(struct ctdb_tunable, tickle_update_interval) },
{ "EventScriptTimeout", 20, offsetof(struct ctdb_tunable, script_timeout) },
{ "RecoveryGracePeriod", 60, offsetof(struct ctdb_tunable, recovery_grace_period) },
{ "RecoveryBanPeriod", 300, offsetof(struct ctdb_tunable, recovery_ban_period) },
--- /dev/null
+/*
+ simple rb vs dlist benchmark
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "lib/events/events.h"
+#include "lib/util/dlinklist.h"
+#include "system/filesys.h"
+#include "popt.h"
+#include "cmdline.h"
+
+#include <sys/time.h>
+#include <time.h>
+#include "common/rb_tree.h"
+
+static struct timeval tp1,tp2;
+
+static void start_timer(void)
+{
+ gettimeofday(&tp1,NULL);
+}
+
+static double end_timer(void)
+{
+ gettimeofday(&tp2,NULL);
+ return (tp2.tv_sec + (tp2.tv_usec*1.0e-6)) -
+ (tp1.tv_sec + (tp1.tv_usec*1.0e-6));
+}
+
+
+static int num_records = 1000;
+
+
+struct list_node {
+ struct list_node *prev, *next;
+};
+
+/*
+ main program
+*/
+int main(int argc, const char *argv[])
+{
+ struct poptOption popt_options[] = {
+ POPT_AUTOHELP
+ POPT_CTDB_CMDLINE
+ { "num-records", 'r', POPT_ARG_INT, &num_records, 0, "num_records", "integer" },
+ POPT_TABLEEND
+ };
+ int opt;
+ const char **extra_argv;
+ int extra_argc = 0;
+ int ret;
+ poptContext pc;
+ struct event_context *ev;
+ double elapsed;
+ int i;
+ trbt_tree_t *tree;
+ struct list_node *list, *list_new, *list_head=NULL;
+
+ pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
+
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ switch (opt) {
+ default:
+ fprintf(stderr, "Invalid option %s: %s\n",
+ poptBadOption(pc, 0), poptStrerror(opt));
+ exit(1);
+ }
+ }
+
+ /* setup the remaining options for the main program to use */
+ extra_argv = poptGetArgs(pc);
+ if (extra_argv) {
+ extra_argv++;
+ while (extra_argv[extra_argc]) extra_argc++;
+ }
+
+ ev = event_context_init(NULL);
+
+
+ printf("testing tree insert for %d records\n", num_records);
+ tree = trbt_create(NULL);
+ start_timer();
+ for (i=0;i<num_records;i++) {
+ trbt_insert32(tree, i, NULL);
+ }
+ elapsed=end_timer();
+ printf("%f seconds\n",(float)elapsed);
+
+
+ printf("testing dlist (worst case) add to tail for %d records\n", num_records);
+ list_new=talloc(NULL, struct list_node);
+ DLIST_ADD(list_head, list_new);
+ start_timer();
+ for (i=0;i<num_records;i++) {
+ for(list=list_head;list->next;list=list->next) {
+ /* the events code does a timeval_compare */
+ timeval_compare(&tp1, &tp2);
+ }
+
+ list_new=talloc(NULL, struct list_node);
+ DLIST_ADD_AFTER(list_head, list_new, list);
+ }
+ elapsed=end_timer();
+ printf("%f seconds\n",(float)elapsed);
+
+ return 0;
+}
return 0;
}
+/*
+ get a list of all tickles for this vnn
+ */
+static int control_get_tickles(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+ struct ctdb_control_tcp_tickle_list *list;
+ uint32_t vnn;
+ int i, ret;
+
+ if (argc < 1) {
+ usage();
+ }
+
+ vnn = strtoul(argv[0], NULL, 0);
+
+ ret = ctdb_ctrl_get_tcp_tickles(ctdb, TIMELIMIT(), options.vnn, ctdb, vnn, &list);
+ if (ret == -1) {
+ printf("Unable to list tickles\n");
+ return -1;
+ }
+
+ printf("Tickles for vnn:%u\n", list->vnn);
+ printf("Num tickles:%u\n", list->tickles.num);
+ for (i=0;i<list->tickles.num;i++) {
+ printf("SRC: %s:%u ", inet_ntoa(list->tickles.connections[i].saddr.sin_addr), ntohs(list->tickles.connections[i].saddr.sin_port));
+ printf("DST: %s:%u\n", inet_ntoa(list->tickles.connections[i].daddr.sin_addr), ntohs(list->tickles.connections[i].daddr.sin_port));
+ }
+
+ talloc_free(list);
+
+ return 0;
+}
+
/*
kill a tcp connection
*/
{ "thaw", control_thaw, true, "thaw all databases" },
{ "killtcp", kill_tcp, false, "kill a tcp connection.", "<srcip:port> <dstip:port>" },
{ "tickle", tickle_tcp, false, "send a tcp tickle ack", "<srcip:port> <dstip:port>" },
+ { "gettickles", control_get_tickles, false, "get the list of tickles registered for this vnn", "<vnn>" },
};
/*
--- /dev/null
+/*
+ simple smnotify tool
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include "smnotify.h"
+#include "../../lib/popt/popt.h"
+
+static char *client = NULL;
+static const char *ip = NULL;
+static const char *server = NULL;
+static int stateval = 0;
+static int clientport = 0;
+static int sendport = 0;
+
+static void useage(void)
+{
+ exit(0);
+}
+
+static int create_socket(const char *addr, int port)
+{
+ int s;
+ struct sockaddr_in sock_in;
+
+ s = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (s == -1) {
+ printf("Failed to open local socket\n");
+ exit(10);
+ }
+
+ bzero(&sock_in, sizeof(sock_in));
+ sock_in.sin_family = PF_INET;
+ sock_in.sin_port = htons(port);
+ inet_aton(addr, &sock_in.sin_addr);
+ if (bind(s, (struct sockaddr *)&sock_in, sizeof(sock_in)) == -1) {
+ printf("Failed to bind to local socket\n");
+ exit(10);
+ }
+
+ return s;
+}
+
+int main(int argc, const char *argv[])
+{
+ struct poptOption popt_options[] = {
+ POPT_AUTOHELP
+ { "client", 'c', POPT_ARG_STRING, &client, 0, "remote client to send the notify to", "hostname/ip" },
+ { "clientport", 0, POPT_ARG_INT, &clientport, 0, "clientport", "integer" },
+ { "ip", 'i', POPT_ARG_STRING, &ip, 0, "local ip address to send the notification from", "ip" },
+ { "sendport", 0, POPT_ARG_INT, &sendport, 0, "port to send the notify from", "integer" },
+ { "server", 's', POPT_ARG_STRING, &server, 0, "servername to use in the notification", "hostname/ip" },
+ { "stateval", 0, POPT_ARG_INT, &stateval, 0, "stateval", "integer" },
+ POPT_TABLEEND
+ };
+ int opt;
+ poptContext pc;
+ CLIENT *clnt;
+ int s;
+ struct sockaddr_in sock_cl;
+ struct timeval w;
+ struct status st;
+
+ pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
+
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ switch (opt) {
+ default:
+ fprintf(stderr, "Invalid option %s: %s\n",
+ poptBadOption(pc, 0), poptStrerror(opt));
+ exit(1);
+ }
+ }
+
+ if (client == NULL) {
+ printf("ERROR: client not specified\n");
+ useage();
+ }
+
+ if (ip == NULL) {
+ printf("ERROR: ip not specified\n");
+ useage();
+ }
+
+ if (server == NULL) {
+ printf("ERROR: server not specified\n");
+ useage();
+ }
+
+ if (stateval == 0) {
+ printf("ERROR: stateval not specified\n");
+ useage();
+ }
+
+
+ /* Since we want to control from which address these packets are
+ sent we must create the socket ourself and use low-level rpc
+ calls.
+ */
+ s = create_socket(ip, sendport);
+
+ /* only wait for at most 3 seconds before giving up */
+ alarm(3);
+
+ /* Setup a sockaddr_in for the client we want to notify */
+ bzero(&sock_cl, sizeof(sock_cl));
+ sock_cl.sin_family = PF_INET;
+ sock_cl.sin_port = htons(clientport);
+ inet_aton(client, &sock_cl.sin_addr);
+
+ w.tv_sec = 1;
+ w.tv_usec= 0;
+
+ clnt = clntudp_create(&sock_cl, 100024, 1, w, &s);
+ if (clnt == NULL) {
+ printf("ERROR: failed to connect to client\n");
+ exit(10);
+ }
+
+ /* we dont want to wait for any reply */
+ w.tv_sec = 0;
+ w.tv_usec = 0;
+ clnt_control(clnt, CLSET_TIMEOUT, (char *)&w);
+
+ st.mon_name=server;
+ st.state=stateval;
+ sm_notify_1(&st, clnt);
+
+ return 0;
+}
--- /dev/null
+
+const SM_MAXSTRLEN = 1024;
+
+struct status {
+ string mon_name<SM_MAXSTRLEN>;
+ int state;
+};
+
+
+program SMNOTIFY {
+ version SMVERSION {
+ void SM_NOTIFY(struct status) = 6;
+ } = 1;
+} = 100024;
+
+