2 * CTDB Performance Metrics Domain Agent (PMDA) for Performance Co-Pilot (PCP)
4 * Copyright (c) 1995,2004 Silicon Graphics, Inc. All Rights Reserved.
5 * Copyright (c) 2011 David Disseldorp
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <pcp/pmapi.h>
26 #include "ctdb_private.h"
27 #include "ctdb_protocol.h"
29 #include "common/system.h"
34 * This PMDA connects to the locally running ctdbd daemon and pulls
35 * statistics for export via PCP. The ctdbd Unix domain socket path can be
36 * specified with the CTDB_SOCKET environment variable, otherwise the default
41 * All metrics supported in this PMDA - one table entry for each.
42 * The 4th field specifies the serial number of the instance domain
43 * for the metric, and must be either PM_INDOM_NULL (denoting a
44 * metric that only ever has a single value), or the serial number
45 * of one of the instance domains declared in the instance domain table
46 * (i.e. in indomtab, above).
48 static pmdaMetric metrictab[] = {
50 { NULL, { PMDA_PMID(0,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
51 PMDA_PMUNITS(0,0,0,0,0,0) }, },
53 { NULL, { PMDA_PMID(0,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
54 PMDA_PMUNITS(0,0,0,0,0,0) }, },
56 { NULL, { PMDA_PMID(0,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
57 PMDA_PMUNITS(0,0,0,0,0,0) }, },
58 /* client_packets_sent */
59 { NULL, { PMDA_PMID(0,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
60 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
61 /* client_packets_recv */
62 { NULL, { PMDA_PMID(0,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
63 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
64 /* node_packets_sent */
65 { NULL, { PMDA_PMID(0,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
66 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
67 /* node_packets_recv */
68 { NULL, { PMDA_PMID(0,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
69 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
70 /* keepalive_packets_sent */
71 { NULL, { PMDA_PMID(0,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
72 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
73 /* keepalive_packets_recv */
74 { NULL, { PMDA_PMID(0,8), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
75 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
77 { NULL, { PMDA_PMID(1,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
78 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
80 { NULL, { PMDA_PMID(1,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
81 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
83 { NULL, { PMDA_PMID(1,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
84 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
86 { NULL, { PMDA_PMID(1,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
87 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
89 { NULL, { PMDA_PMID(1,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
90 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
92 { NULL, { PMDA_PMID(1,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
93 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
95 { NULL, { PMDA_PMID(1,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
96 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
98 { NULL, { PMDA_PMID(1,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
99 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
101 { NULL, { PMDA_PMID(2,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
102 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
104 { NULL, { PMDA_PMID(2,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
105 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
107 { NULL, { PMDA_PMID(2,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
108 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
110 { NULL, { PMDA_PMID(3,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
111 PMDA_PMUNITS(0,0,1,0,0,0) }, },
113 { NULL, { PMDA_PMID(3,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
114 PMDA_PMUNITS(0,0,1,0,0,0) }, },
116 { NULL, { PMDA_PMID(3,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
117 PMDA_PMUNITS(0,0,1,0,0,0) }, },
119 { NULL, { PMDA_PMID(0,9), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
120 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
122 { NULL, { PMDA_PMID(0,10), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
123 PMDA_PMUNITS(0,0,0,0,0,0) }, },
124 /* locks.num_calls */
125 { NULL, { PMDA_PMID(0,11), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
126 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
127 /* locks.num_pending */
128 { NULL, { PMDA_PMID(0,12), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
129 PMDA_PMUNITS(0,0,0,0,0,0) }, },
130 /* childwrite_calls */
131 { NULL, { PMDA_PMID(0,13), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
132 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
133 /* pending_childwrite_calls */
134 { NULL, { PMDA_PMID(0,14), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
135 PMDA_PMUNITS(0,0,0,0,0,0) }, },
137 { NULL, { PMDA_PMID(0,15), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
138 PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0) }, },
140 { NULL, { PMDA_PMID(0,16), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
141 PMDA_PMUNITS(0,0,0,0,0,0) }, },
142 /* reclock.ctdbd.max */
143 { NULL, { PMDA_PMID(0,17), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
144 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
145 /* reclock.recd.max */
146 { NULL, { PMDA_PMID(0,18), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
147 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
148 /* call_latency.max */
149 { NULL, { PMDA_PMID(0,19), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
150 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
151 /* locks.latency.max */
152 { NULL, { PMDA_PMID(0,20), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
153 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
154 /* childwrite_latency.max */
155 { NULL, { PMDA_PMID(0,21), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
156 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
158 { NULL, { PMDA_PMID(0,22), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
159 PMDA_PMUNITS(0,0,0,0,0,0) }, },
162 static struct tevent_context *ev;
163 static struct ctdb_context *ctdb;
164 static struct ctdb_statistics *stats;
167 pmda_ctdb_q_read_cb(uint8_t *data, size_t cnt, void *args)
170 fprintf(stderr, "ctdbd unreachable\n");
171 /* cleanup on request timeout */
175 ctdb_client_read_cb(data, cnt, args);
180 pmda_ctdb_daemon_connect(void)
182 const char *socket_name;
184 struct sockaddr_un addr;
186 ev = tevent_context_init(NULL);
188 fprintf(stderr, "Failed to init event ctx\n");
192 ctdb = ctdb_init(ev);
194 fprintf(stderr, "Failed to init ctdb\n");
198 socket_name = getenv("CTDB_SOCKET");
199 if (socket_name == NULL) {
200 socket_name = CTDB_SOCKET;
203 ret = ctdb_set_socketname(ctdb, socket_name);
205 fprintf(stderr, "ctdb_set_socketname failed - %s\n",
211 * ctdb_socket_connect() sets a default queue callback handler that
212 * calls exit() if ctdbd is unavailable on recv, use our own wrapper to
216 memset(&addr, 0, sizeof(addr));
217 addr.sun_family = AF_UNIX;
218 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
220 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
221 if (ctdb->daemon.sd == -1) {
222 fprintf(stderr, "Failed to open client socket\n");
226 set_nonblocking(ctdb->daemon.sd);
227 set_close_on_exec(ctdb->daemon.sd);
229 if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
230 fprintf(stderr, "Failed to connect to ctdb daemon via %s\n",
235 ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
237 pmda_ctdb_q_read_cb, ctdb,
239 if (ctdb->daemon.queue == NULL) {
240 fprintf(stderr, "Failed to setup queue\n");
244 ctdb->pnn = ctdb_ctrl_getpnn(ctdb, timeval_current_ofs(3, 0),
246 if (ctdb->pnn == (uint32_t)-1) {
247 fprintf(stderr, "Failed to get ctdb pnn\n");
253 close(ctdb->daemon.sd);
263 pmda_ctdb_daemon_disconnect(void)
266 ctdb->methods->shutdown(ctdb);
269 if (ctdb->daemon.sd != -1) {
270 close(ctdb->daemon.sd);
279 fill_base(unsigned int item, pmAtomValue *atom)
283 atom->ul = stats->num_clients;
286 atom->ul = stats->frozen;
289 atom->ul = stats->recovering;
292 atom->ul = stats->client_packets_sent;
295 atom->ul = stats->client_packets_recv;
298 atom->ul = stats->node_packets_sent;
301 atom->ul = stats->node_packets_recv;
304 atom->ul = stats->keepalive_packets_sent;
307 atom->ul = stats->keepalive_packets_recv;
310 atom->ul = stats->total_calls;
313 atom->ul = stats->pending_calls;
316 atom->ul = stats->locks.num_calls;
319 atom->ul = stats->locks.num_pending;
322 atom->ul = stats->childwrite_calls;
325 atom->ul = stats->pending_childwrite_calls;
328 atom->ul = stats->memory_used;
331 atom->ul = stats->max_hop_count;
334 atom->d = stats->reclock.ctdbd.max;
337 atom->d = stats->reclock.recd.max;
340 atom->d = stats->call_latency.max;
343 atom->d = stats->locks.latency.max;
346 atom->d = stats->childwrite_latency.max;
349 atom->d = stats->num_recoveries;
359 fill_node(unsigned int item, pmAtomValue *atom)
363 atom->ul = stats->node.req_call;
366 atom->ul = stats->node.reply_call;
369 atom->ul = stats->node.req_dmaster;
372 atom->ul = stats->node.reply_dmaster;
375 atom->ul = stats->node.reply_error;
378 atom->ul = stats->node.req_message;
381 atom->ul = stats->node.req_control;
384 atom->ul = stats->node.reply_control;
395 fill_client(unsigned int item, pmAtomValue *atom)
399 atom->ul = stats->client.req_call;
402 atom->ul = stats->client.req_message;
405 atom->ul = stats->client.req_control;
415 fill_timeout(unsigned int item, pmAtomValue *atom)
419 atom->ul = stats->timeouts.call;
422 atom->ul = stats->timeouts.control;
425 atom->ul = stats->timeouts.traverse;
435 * callback provided to pmdaFetch
438 pmda_ctdb_fetch_cb(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
441 __pmID_int *id = (__pmID_int *)&(mdesc->m_desc.pmid);
443 if (inst != PM_IN_NULL) {
448 fprintf(stderr, "stats not available\n");
454 switch (id->cluster) {
456 ret = fill_base(id->item, atom);
462 ret = fill_node(id->item, atom);
468 ret = fill_client(id->item, atom);
474 ret = fill_timeout(id->item, atom);
489 * This routine is called once for each pmFetch(3) operation, so is a
490 * good place to do once-per-fetch functions, such as value caching or
491 * instance domain evaluation.
494 pmda_ctdb_fetch(int numpmid, pmID pmidlist[], pmResult **resp, pmdaExt *pmda)
499 struct timeval ctdb_timeout;
502 fprintf(stderr, "attempting reconnect to ctdbd\n");
503 ret = pmda_ctdb_daemon_connect();
505 fprintf(stderr, "reconnect failed\n");
510 ctdb_timeout = timeval_current_ofs(1, 0);
511 ret = ctdb_control(ctdb, ctdb->pnn, 0,
512 CTDB_CONTROL_STATISTICS, 0, tdb_null,
513 ctdb, &data, &res, &ctdb_timeout, NULL);
515 if (ret != 0 || res != 0) {
516 fprintf(stderr, "ctdb control for statistics failed, reconnecting\n");
517 pmda_ctdb_daemon_disconnect();
522 stats = (struct ctdb_statistics *)data.dptr;
524 if (data.dsize != sizeof(struct ctdb_statistics)) {
525 fprintf(stderr, "incorrect statistics size %zu - not %zu\n",
526 data.dsize, sizeof(struct ctdb_statistics));
531 ret = pmdaFetch(numpmid, pmidlist, resp, pmda);
539 void pmda_ctdb_init(pmdaInterface *dp);
542 * Initialise the agent
545 pmda_ctdb_init(pmdaInterface *dp)
547 if (dp->status != 0) {
551 dp->version.two.fetch = pmda_ctdb_fetch;
552 pmdaSetFetchCallBack(dp, pmda_ctdb_fetch_cb);
554 pmdaInit(dp, NULL, 0, metrictab,
555 (sizeof(metrictab) / sizeof(metrictab[0])));
561 static char buf[MAXPATHLEN];
564 snprintf(buf, sizeof(buf), "%s/ctdb/help",
565 pmGetConfig("PCP_PMDAS_DIR"));
573 fprintf(stderr, "Usage: %s [options]\n\n", pmProgname);
575 " -d domain use domain (numeric) for metrics domain of PMDA\n"
576 " -l logfile write log into logfile rather than using default log name\n"
577 "\nExactly one of the following options may appear:\n"
578 " -i port expect PMCD to connect on given inet port (number or name)\n"
579 " -p expect PMCD to supply stdin/stdout (pipe)\n"
580 " -u socket expect PMCD to connect on given unix domain socket\n",
586 * Set up the agent if running as a daemon.
589 main(int argc, char **argv)
592 char log_file[] = "pmda_ctdb.log";
593 pmdaInterface dispatch;
595 __pmSetProgname(argv[0]);
597 pmdaDaemon(&dispatch, PMDA_INTERFACE_2, pmProgname, CTDB,
598 log_file, helpfile());
600 if (pmdaGetOpt(argc, argv, "d:i:l:pu:?", &dispatch, &err) != EOF) {
608 pmdaOpenLog(&dispatch);
609 pmda_ctdb_init(&dispatch);
610 pmdaConnect(&dispatch);