Create a tunable for how often to collect rolling statistics and initialize it to...
[metze/ctdb/wip.git] / include / ctdb_private.h
index af271f4bfb5c839ae64bc539f22a33d115b90fad..f018554dd71e74c932bd408c94e326f48c2bdcaa 100644 (file)
@@ -46,6 +46,7 @@ extern pid_t ctdbd_pid;
 
 /*
   a tcp connection description
+  also used by tcp_add and tcp_remove controls
  */
 struct ctdb_tcp_connection {
        ctdb_sock_addr src_addr;
@@ -116,6 +117,7 @@ struct ctdb_tunable {
        uint32_t max_queue_depth_drop_msg;
        uint32_t use_status_events_for_monitoring;
        uint32_t allow_unhealthy_db_read;
+       uint32_t stat_history_interval;
 };
 
 /*
@@ -204,14 +206,6 @@ struct ctdb_node {
        const char *name; /* for debug messages */
        void *private_data; /* private to transport */
        uint32_t pnn;
-#define NODE_FLAGS_DISCONNECTED                0x00000001 /* node isn't connected */
-#define NODE_FLAGS_UNHEALTHY           0x00000002 /* monitoring says node is unhealthy */
-#define NODE_FLAGS_PERMANENTLY_DISABLED        0x00000004 /* administrator has disabled node */
-#define NODE_FLAGS_BANNED              0x00000008 /* recovery daemon has banned the node */
-#define NODE_FLAGS_DELETED             0x00000010 /* this node has been deleted */
-#define NODE_FLAGS_STOPPED             0x00000020 /* this node has been stopped */
-#define NODE_FLAGS_DISABLED            (NODE_FLAGS_UNHEALTHY|NODE_FLAGS_PERMANENTLY_DISABLED)
-#define NODE_FLAGS_INACTIVE            (NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)
        uint32_t flags;
 
        /* used by the dead node monitoring */
@@ -283,59 +277,63 @@ struct ctdb_daemon_data {
        struct ctdb_queue *queue;
 };
 
-/*
-  ctdb status information
- */
-struct ctdb_statistics {
-       uint32_t num_clients;
-       uint32_t frozen;
-       uint32_t recovering;
-       uint32_t client_packets_sent;
-       uint32_t client_packets_recv;
-       uint32_t node_packets_sent;
-       uint32_t node_packets_recv;
-       uint32_t keepalive_packets_sent;
-       uint32_t keepalive_packets_recv;
-       struct {
-               uint32_t req_call;
-               uint32_t reply_call;
-               uint32_t req_dmaster;
-               uint32_t reply_dmaster;
-               uint32_t reply_error;
-               uint32_t req_message;
-               uint32_t req_control;
-               uint32_t reply_control;
-       } node;
-       struct {
-               uint32_t req_call;
-               uint32_t req_message;
-               uint32_t req_control;
-       } client;
-       struct {
-               uint32_t call;
-               uint32_t control;
-               uint32_t traverse;
-       } timeouts;
-       struct {
-               double ctdbd;
-               double recd;
-       } reclock;
-       uint32_t total_calls;
-       uint32_t pending_calls;
-       uint32_t lockwait_calls;
-       uint32_t pending_lockwait_calls;
-       uint32_t childwrite_calls;
-       uint32_t pending_childwrite_calls;
-       uint32_t memory_used;
-       uint32_t __last_counter; /* hack for control_statistics_all */
-       uint32_t max_hop_count;
-       double max_call_latency;
-       double max_lockwait_latency;
-       double max_childwrite_latency;
-       uint32_t num_recoveries;
-       struct timeval statistics_start_time;
-       struct timeval statistics_current_time;
-};
+
+#define CTDB_UPDATE_STAT(ctdb, counter, value) \
+       {                                                                               \
+               if (value > ctdb->statistics.counter) {                                 \
+                       ctdb->statistics.counter = c->hopcount;                         \
+               }                                                                       \
+               if (value > ctdb->statistics_current.counter) {                         \
+                       ctdb->statistics_current.counter = c->hopcount;                 \
+               }                                                                       \
+       }
+
+#define CTDB_INCREMENT_STAT(ctdb, counter) \
+       {                                                                               \
+               ctdb->statistics.counter++;                                             \
+               ctdb->statistics_current.counter++;                                     \
+       }
+
+#define CTDB_DECREMENT_STAT(ctdb, counter) \
+       {                                                                               \
+               if (ctdb->statistics.counter > 0)                                       \
+                       ctdb->statistics.counter--;                                     \
+               if (ctdb->statistics_current.counter > 0)                               \
+                       ctdb->statistics_current.counter--;                             \
+       }
+
+#define CTDB_UPDATE_RECLOCK_LATENCY(ctdb, name, counter, value) \
+       {                                                                               \
+               if (value > ctdb->statistics.counter)                                   \
+                       ctdb->statistics.counter = value;                               \
+               if (value > ctdb->statistics_current.counter)                           \
+                       ctdb->statistics_current.counter = value;                       \
+                                                                                       \
+               if (ctdb->tunable.reclock_latency_ms != 0) {                            \
+                       if (value*1000 > ctdb->tunable.reclock_latency_ms) {            \
+                               DEBUG(DEBUG_ERR, ("High RECLOCK latency %fs for operation %s\n", value, name)); \
+                       }                                                               \
+               }                                                                       \
+       }
+
+
+#define CTDB_UPDATE_LATENCY(ctdb, db, operation, counter, t) \
+       {                                                                               \
+               double l = timeval_elapsed(&t);                                         \
+               if (l > ctdb->statistics.counter)                                       \
+                       ctdb->statistics.counter = l;                                   \
+               if (l > ctdb->statistics_current.counter)                               \
+                       ctdb->statistics_current.counter = l;                           \
+                                                                                       \
+               if (ctdb->tunable.log_latency_ms !=0) {                                 \
+                       if (l*1000 > ctdb->tunable.log_latency_ms) {                    \
+                               DEBUG(DEBUG_WARNING, ("High latency %.6fs for operation %s on database %s\n", l, operation, db->db_name));\
+                       }                                                               \
+               }                                                                       \
+       }
+
+
+
 
 
 #define INVALID_GENERATION 1
@@ -425,6 +423,9 @@ struct ctdb_context {
        struct ctdb_message_list *message_list;
        struct ctdb_daemon_data daemon;
        struct ctdb_statistics statistics;
+       struct ctdb_statistics statistics_current;
+#define MAX_STAT_HISTORY 100
+       struct ctdb_statistics statistics_history[MAX_STAT_HISTORY];
        struct ctdb_vnn_map *vnn_map;
        uint32_t num_clients;
        uint32_t recovery_master;
@@ -456,6 +457,8 @@ struct ctdb_context {
 
        TALLOC_CTX *banning_ctx;
 
+       struct ctdb_vacuum_child_context *vacuumers;
+
        /* mapping from pid to ctdb_client * */
        struct ctdb_client_pid_list *client_pids;
 
@@ -541,14 +544,6 @@ struct ctdb_control_gratious_arp {
        char iface[1];
 };
 
-/*
-  struct for tcp_add and tcp_remove controls
- */
-struct ctdb_control_tcp_vnn {
-       ctdb_sock_addr src;
-       ctdb_sock_addr dest;
-};
-
 /*
   persistent store control - update this record on all other nodes
  */
@@ -761,9 +756,6 @@ void ctdb_recv_raw_pkt(void *p, uint8_t *data, uint32_t length);
 
 int ctdb_socket_connect(struct ctdb_context *ctdb);
 
-void ctdb_latency(struct ctdb_db_context *ctdb_db, const char *name, double *latency, struct timeval t);
-void ctdb_reclock_latency(struct ctdb_context *ctdb, const char *name, double *latency, double l);
-
 #define CTDB_BAD_REQID ((uint32_t)-1)
 uint32_t ctdb_reqid_new(struct ctdb_context *ctdb, void *state);
 void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *type, const char *location);
@@ -884,20 +876,6 @@ struct ctdb_control_list_tunable {
 };
 
 
-/* table that contains a list of all nodes a ctdb knows about and their 
-   status
- */
-struct ctdb_node_and_flags {
-       uint32_t pnn;
-       uint32_t flags;
-       ctdb_sock_addr addr;
-};
-
-struct ctdb_node_map {
-       uint32_t num;
-       struct ctdb_node_and_flags nodes[1];
-};
-
 struct ctdb_node_and_flagsv4 {
        uint32_t pnn;
        uint32_t flags;
@@ -1018,10 +996,6 @@ struct ctdb_public_ipv4 {
        struct sockaddr_in sin;
 };
 
-struct ctdb_public_ip {
-       uint32_t pnn;
-       ctdb_sock_addr addr;
-};
 int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout, 
                          uint32_t destnode, struct ctdb_public_ip *ip);
 int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout, 
@@ -1032,10 +1006,6 @@ struct ctdb_all_public_ipsv4 {
        struct ctdb_public_ipv4 ips[1];
 };
 
-struct ctdb_all_public_ips {
-       uint32_t num;
-       struct ctdb_public_ip ips[1];
-};
 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA *outdata);
 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA *outdata);
 int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb, 
@@ -1120,7 +1090,7 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap);
 
 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, 
                                TDB_DATA indata);
-int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed);
 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn);
 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata);
@@ -1312,11 +1282,13 @@ int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval
 int32_t ctdb_control_stop_node(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply);
 int32_t ctdb_control_continue_node(struct ctdb_context *ctdb);
 
+void ctdb_stop_vacuuming(struct ctdb_context *ctdb);
 int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db);
 
 int32_t ctdb_control_enable_script(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_disable_script(struct ctdb_context *ctdb, TDB_DATA indata);
 
+int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb);
 int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata);
 int32_t ctdb_control_set_db_priority(struct ctdb_context *ctdb, TDB_DATA indata);
@@ -1370,4 +1342,12 @@ int verify_remote_ip_allocation(struct ctdb_context *ctdb,
 int update_ip_assignment_tree(struct ctdb_context *ctdb,
                                struct ctdb_public_ip *ip);
 
+int ctdb_init_tevent_logging(struct ctdb_context *ctdb);
+
+int ctdb_statistics_init(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb,
+                                     struct ctdb_req_control *c,
+                                     TDB_DATA *outdata);
+
 #endif