Create a tunable for how often to collect rolling statistics and initialize it to...
[metze/ctdb/wip.git] / include / ctdb_private.h
index 7b752c5726a9ad7d9aafe0794c3205595f489ac3..f018554dd71e74c932bd408c94e326f48c2bdcaa 100644 (file)
@@ -46,6 +46,7 @@ extern pid_t ctdbd_pid;
 
 /*
   a tcp connection description
+  also used by tcp_add and tcp_remove controls
  */
 struct ctdb_tcp_connection {
        ctdb_sock_addr src_addr;
@@ -116,6 +117,7 @@ struct ctdb_tunable {
        uint32_t max_queue_depth_drop_msg;
        uint32_t use_status_events_for_monitoring;
        uint32_t allow_unhealthy_db_read;
+       uint32_t stat_history_interval;
 };
 
 /*
@@ -204,14 +206,6 @@ struct ctdb_node {
        const char *name; /* for debug messages */
        void *private_data; /* private to transport */
        uint32_t pnn;
-#define NODE_FLAGS_DISCONNECTED                0x00000001 /* node isn't connected */
-#define NODE_FLAGS_UNHEALTHY           0x00000002 /* monitoring says node is unhealthy */
-#define NODE_FLAGS_PERMANENTLY_DISABLED        0x00000004 /* administrator has disabled node */
-#define NODE_FLAGS_BANNED              0x00000008 /* recovery daemon has banned the node */
-#define NODE_FLAGS_DELETED             0x00000010 /* this node has been deleted */
-#define NODE_FLAGS_STOPPED             0x00000020 /* this node has been stopped */
-#define NODE_FLAGS_DISABLED            (NODE_FLAGS_UNHEALTHY|NODE_FLAGS_PERMANENTLY_DISABLED)
-#define NODE_FLAGS_INACTIVE            (NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)
        uint32_t flags;
 
        /* used by the dead node monitoring */
@@ -272,7 +266,7 @@ struct ctdb_message_list {
        struct ctdb_context *ctdb;
        struct ctdb_message_list *next, *prev;
        uint64_t srvid;
-       ctdb_message_fn_t message_handler;
+       ctdb_msg_fn_t message_handler;
        void *message_private;
 };
 
@@ -283,57 +277,63 @@ struct ctdb_daemon_data {
        struct ctdb_queue *queue;
 };
 
-/*
-  ctdb status information
- */
-struct ctdb_statistics {
-       uint32_t num_clients;
-       uint32_t frozen;
-       uint32_t recovering;
-       uint32_t client_packets_sent;
-       uint32_t client_packets_recv;
-       uint32_t node_packets_sent;
-       uint32_t node_packets_recv;
-       uint32_t keepalive_packets_sent;
-       uint32_t keepalive_packets_recv;
-       struct {
-               uint32_t req_call;
-               uint32_t reply_call;
-               uint32_t req_dmaster;
-               uint32_t reply_dmaster;
-               uint32_t reply_error;
-               uint32_t req_message;
-               uint32_t req_control;
-               uint32_t reply_control;
-       } node;
-       struct {
-               uint32_t req_call;
-               uint32_t req_message;
-               uint32_t req_control;
-       } client;
-       struct {
-               uint32_t call;
-               uint32_t control;
-               uint32_t traverse;
-       } timeouts;
-       struct {
-               double ctdbd;
-               double recd;
-       } reclock;
-       uint32_t total_calls;
-       uint32_t pending_calls;
-       uint32_t lockwait_calls;
-       uint32_t pending_lockwait_calls;
-       uint32_t childwrite_calls;
-       uint32_t pending_childwrite_calls;
-       uint32_t memory_used;
-       uint32_t __last_counter; /* hack for control_statistics_all */
-       uint32_t max_hop_count;
-       double max_call_latency;
-       double max_lockwait_latency;
-       double max_childwrite_latency;
-       uint32_t num_recoveries;
-};
+
+#define CTDB_UPDATE_STAT(ctdb, counter, value) \
+       {                                                                               \
+               if (value > ctdb->statistics.counter) {                                 \
+                       ctdb->statistics.counter = c->hopcount;                         \
+               }                                                                       \
+               if (value > ctdb->statistics_current.counter) {                         \
+                       ctdb->statistics_current.counter = c->hopcount;                 \
+               }                                                                       \
+       }
+
+#define CTDB_INCREMENT_STAT(ctdb, counter) \
+       {                                                                               \
+               ctdb->statistics.counter++;                                             \
+               ctdb->statistics_current.counter++;                                     \
+       }
+
+#define CTDB_DECREMENT_STAT(ctdb, counter) \
+       {                                                                               \
+               if (ctdb->statistics.counter > 0)                                       \
+                       ctdb->statistics.counter--;                                     \
+               if (ctdb->statistics_current.counter > 0)                               \
+                       ctdb->statistics_current.counter--;                             \
+       }
+
+#define CTDB_UPDATE_RECLOCK_LATENCY(ctdb, name, counter, value) \
+       {                                                                               \
+               if (value > ctdb->statistics.counter)                                   \
+                       ctdb->statistics.counter = value;                               \
+               if (value > ctdb->statistics_current.counter)                           \
+                       ctdb->statistics_current.counter = value;                       \
+                                                                                       \
+               if (ctdb->tunable.reclock_latency_ms != 0) {                            \
+                       if (value*1000 > ctdb->tunable.reclock_latency_ms) {            \
+                               DEBUG(DEBUG_ERR, ("High RECLOCK latency %fs for operation %s\n", value, name)); \
+                       }                                                               \
+               }                                                                       \
+       }
+
+
+#define CTDB_UPDATE_LATENCY(ctdb, db, operation, counter, t) \
+       {                                                                               \
+               double l = timeval_elapsed(&t);                                         \
+               if (l > ctdb->statistics.counter)                                       \
+                       ctdb->statistics.counter = l;                                   \
+               if (l > ctdb->statistics_current.counter)                               \
+                       ctdb->statistics_current.counter = l;                           \
+                                                                                       \
+               if (ctdb->tunable.log_latency_ms !=0) {                                 \
+                       if (l*1000 > ctdb->tunable.log_latency_ms) {                    \
+                               DEBUG(DEBUG_WARNING, ("High latency %.6fs for operation %s on database %s\n", l, operation, db->db_name));\
+                       }                                                               \
+               }                                                                       \
+       }
+
+
+
 
 
 #define INVALID_GENERATION 1
@@ -380,7 +380,7 @@ enum ctdb_freeze_mode {CTDB_FREEZE_NONE, CTDB_FREEZE_PENDING, CTDB_FREEZE_FROZEN
 #define NUM_DB_PRIORITIES 3
 /* main state of the ctdb daemon */
 struct ctdb_context {
-       struct event_context *ev;
+       struct tevent_context *ev;
        struct timeval ctdbd_start_time;
        struct timeval last_recovery_started;
        struct timeval last_recovery_finished;
@@ -410,7 +410,7 @@ struct ctdb_context {
        unsigned flags;
        uint32_t capabilities;
        struct idr_context *idr;
-       uint16_t idr_cnt;
+       int lastid;
        struct ctdb_node **nodes; /* array of nodes in the cluster - indexed by vnn */
        struct ctdb_vnn *vnn; /* list of public ip addresses and interfaces */
        struct ctdb_vnn *single_ip_vnn; /* a structure for the single ip */
@@ -423,6 +423,9 @@ struct ctdb_context {
        struct ctdb_message_list *message_list;
        struct ctdb_daemon_data daemon;
        struct ctdb_statistics statistics;
+       struct ctdb_statistics statistics_current;
+#define MAX_STAT_HISTORY 100
+       struct ctdb_statistics statistics_history[MAX_STAT_HISTORY];
        struct ctdb_vnn_map *vnn_map;
        uint32_t num_clients;
        uint32_t recovery_master;
@@ -454,6 +457,8 @@ struct ctdb_context {
 
        TALLOC_CTX *banning_ctx;
 
+       struct ctdb_vacuum_child_context *vacuumers;
+
        /* mapping from pid to ctdb_client * */
        struct ctdb_client_pid_list *client_pids;
 
@@ -539,14 +544,6 @@ struct ctdb_control_gratious_arp {
        char iface[1];
 };
 
-/*
-  struct for tcp_add and tcp_remove controls
- */
-struct ctdb_control_tcp_vnn {
-       ctdb_sock_addr src;
-       ctdb_sock_addr dest;
-};
-
 /*
   persistent store control - update this record on all other nodes
  */
@@ -674,7 +671,8 @@ struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
                                    TALLOC_CTX *mem_ctx, int fd, int alignment,
                                    
                                    ctdb_queue_cb_fn_t callback,
-                                   void *private_data);
+                                   void *private_data, const char *fmt, ...)
+       PRINTF_ATTRIBUTE(7,8);
 
 /*
   allocate a packet for use in client<->daemon communication
@@ -725,10 +723,6 @@ struct ctdb_call_state *ctdb_client_call_send(struct ctdb_db_context *ctdb_db,
 */
 int ctdb_client_call_recv(struct ctdb_call_state *state, struct ctdb_call *call);
 
-int ctdb_daemon_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid, 
-                            ctdb_message_fn_t handler,
-                            void *private_data);
-
 int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t vnn,
                             uint64_t srvid, TDB_DATA data);
 
@@ -762,9 +756,6 @@ void ctdb_recv_raw_pkt(void *p, uint8_t *data, uint32_t length);
 
 int ctdb_socket_connect(struct ctdb_context *ctdb);
 
-void ctdb_latency(struct ctdb_db_context *ctdb_db, const char *name, double *latency, struct timeval t);
-void ctdb_reclock_latency(struct ctdb_context *ctdb, const char *name, double *latency, double l);
-
 #define CTDB_BAD_REQID ((uint32_t)-1)
 uint32_t ctdb_reqid_new(struct ctdb_context *ctdb, void *state);
 void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *type, const char *location);
@@ -885,20 +876,6 @@ struct ctdb_control_list_tunable {
 };
 
 
-/* table that contains a list of all nodes a ctdb knows about and their 
-   status
- */
-struct ctdb_node_and_flags {
-       uint32_t pnn;
-       uint32_t flags;
-       ctdb_sock_addr addr;
-};
-
-struct ctdb_node_map {
-       uint32_t num;
-       struct ctdb_node_and_flags nodes[1];
-};
-
 struct ctdb_node_and_flagsv4 {
        uint32_t pnn;
        uint32_t flags;
@@ -1019,10 +996,6 @@ struct ctdb_public_ipv4 {
        struct sockaddr_in sin;
 };
 
-struct ctdb_public_ip {
-       uint32_t pnn;
-       ctdb_sock_addr addr;
-};
 int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout, 
                          uint32_t destnode, struct ctdb_public_ip *ip);
 int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout, 
@@ -1033,10 +1006,6 @@ struct ctdb_all_public_ipsv4 {
        struct ctdb_public_ipv4 ips[1];
 };
 
-struct ctdb_all_public_ips {
-       uint32_t num;
-       struct ctdb_public_ip ips[1];
-};
 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA *outdata);
 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA *outdata);
 int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb, 
@@ -1121,7 +1090,7 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap);
 
 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, 
                                TDB_DATA indata);
-int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed);
 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn);
 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata);
@@ -1301,6 +1270,7 @@ int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb);
 int32_t ctdb_control_set_recmaster(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata);
 
 extern int script_log_level;
+extern bool fast_start;
 
 int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb,
                                             uint32_t call_type,
@@ -1312,11 +1282,13 @@ int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval
 int32_t ctdb_control_stop_node(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply);
 int32_t ctdb_control_continue_node(struct ctdb_context *ctdb);
 
+void ctdb_stop_vacuuming(struct ctdb_context *ctdb);
 int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db);
 
 int32_t ctdb_control_enable_script(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_disable_script(struct ctdb_context *ctdb, TDB_DATA indata);
 
+int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb);
 int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata);
 int32_t ctdb_control_set_db_priority(struct ctdb_context *ctdb, TDB_DATA indata);
@@ -1370,4 +1342,12 @@ int verify_remote_ip_allocation(struct ctdb_context *ctdb,
 int update_ip_assignment_tree(struct ctdb_context *ctdb,
                                struct ctdb_public_ip *ip);
 
+int ctdb_init_tevent_logging(struct ctdb_context *ctdb);
+
+int ctdb_statistics_init(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb,
+                                     struct ctdb_req_control *c,
+                                     TDB_DATA *outdata);
+
 #endif