4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "system/network.h"
22 #include "system/filesys.h"
23 #include "system/wait.h"
27 #include "lib/util/debug.h"
28 #include "lib/util/samba_util.h"
30 #include "ctdb_logging.h"
31 #include "ctdb_private.h"
33 #include "common/reqid.h"
34 #include "common/system.h"
37 return error string for last error
39 const char *ctdb_errstr(struct ctdb_context *ctdb)
46 remember an error message
48 void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...)
51 talloc_free(ctdb->err_msg);
53 ctdb->err_msg = talloc_vasprintf(ctdb, fmt, ap);
54 DEBUG(DEBUG_ERR,("ctdb error: %s\n", ctdb->err_msg));
59 a fatal internal error occurred - no hope for recovery
61 void ctdb_fatal(struct ctdb_context *ctdb, const char *msg)
63 DEBUG(DEBUG_ALERT,("ctdb fatal error: %s\n", msg));
68 like ctdb_fatal() but a core/backtrace would not be useful
70 void ctdb_die(struct ctdb_context *ctdb, const char *msg)
72 DEBUG(DEBUG_ALERT,("ctdb exiting with error: %s\n", msg));
76 /* Set the path of a helper program from envvar, falling back to
77 * dir/file if envvar unset. type is a string to print in log
78 * messages. helper is assumed to point to a statically allocated
79 * array of size bytes, initialised to "". If file is NULL don't fall
80 * back if envvar is unset. If dir is NULL and envvar is unset (but
81 * file is not NULL) then this is an error. Returns true if helper is
82 * set, either previously or this time. */
83 bool ctdb_set_helper(const char *type, char *helper, size_t size,
85 const char *dir, const char *file)
90 if (helper[0] != '\0') {
97 if (strlen(t) >= size) {
99 ("Unable to set %s - path too long\n", type));
103 strncpy(helper, t, size);
104 } else if (file == NULL) {
106 } else if (dir == NULL) {
108 ("Unable to set %s - dir is NULL\n", type));
111 if (snprintf(helper, size, "%s/%s", dir, file) >= size) {
113 ("Unable to set %s - path too long\n", type));
118 if (stat(helper, &st) != 0) {
120 ("Unable to set %s \"%s\" - %s\n",
121 type, helper, strerror(errno)));
124 if (!(st.st_mode & S_IXUSR)) {
126 ("Unable to set %s \"%s\" - not executable\n",
132 ("Set %s to \"%s\"\n", type, helper));
136 /* Invoke an external program to do some sort of tracing on the CTDB
137 * process. This might block for a little while. The external
138 * program is specified by the environment variable
139 * CTDB_EXTERNAL_TRACE. This program should take one argument: the
140 * pid of the process to trace. Commonly, the program would be a
141 * wrapper script around gcore.
143 void ctdb_external_trace(void)
146 static char external_trace[PATH_MAX+1] = "";
149 if (!ctdb_set_helper("external trace handler",
150 external_trace, sizeof(external_trace),
151 "CTDB_EXTERNAL_TRACE", NULL, NULL)) {
155 cmd = talloc_asprintf(NULL, "%s %lu", external_trace, (unsigned long) getpid());
156 DEBUG(DEBUG_WARNING,("begin external trace: %s\n", cmd));
160 ("external trace command \"%s\" failed\n", cmd));
162 DEBUG(DEBUG_WARNING,("end external trace: %s\n", cmd));
169 int ctdb_parse_address(TALLOC_CTX *mem_ctx, const char *str,
170 ctdb_sock_addr *address)
176 se = getservbyname("ctdb", "tcp");
182 port = ntohs(se->s_port);
185 if (! parse_ip(str, NULL, port, address)) {
194 check if two addresses are the same
196 bool ctdb_same_address(ctdb_sock_addr *a1, ctdb_sock_addr *a2)
198 return ctdb_same_ip(a1, a2) &&
199 ctdb_addr_to_port(a1) == ctdb_addr_to_port(a2);
204 hash function for mapping data to a VNN - taken from tdb
206 uint32_t ctdb_hash(const TDB_DATA *key)
208 return tdb_jenkins_hash(discard_const(key));
212 static uint32_t ctdb_marshall_record_size(TDB_DATA key,
213 struct ctdb_ltdb_header *header,
216 return offsetof(struct ctdb_rec_data, data) + key.dsize +
217 data.dsize + (header ? sizeof(*header) : 0);
220 static void ctdb_marshall_record_copy(struct ctdb_rec_data *rec,
223 struct ctdb_ltdb_header *header,
229 rec->length = length;
231 rec->keylen = key.dsize;
232 memcpy(&rec->data[0], key.dptr, key.dsize);
236 rec->datalen = data.dsize + sizeof(*header);
237 memcpy(&rec->data[offset], header, sizeof(*header));
238 offset += sizeof(*header);
240 rec->datalen = data.dsize;
242 memcpy(&rec->data[offset], data.dptr, data.dsize);
246 form a ctdb_rec_data record from a key/data pair
248 note that header may be NULL. If not NULL then it is included in the data portion
251 struct ctdb_rec_data *ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32_t reqid,
253 struct ctdb_ltdb_header *header,
257 struct ctdb_rec_data *d;
259 length = ctdb_marshall_record_size(key, header, data);
261 d = (struct ctdb_rec_data *)talloc_size(mem_ctx, length);
266 ctdb_marshall_record_copy(d, reqid, key, header, data, length);
271 /* helper function for marshalling multiple records */
272 struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx,
273 struct ctdb_marshall_buffer *m,
277 struct ctdb_ltdb_header *header,
280 struct ctdb_rec_data *r;
281 struct ctdb_marshall_buffer *m2;
282 uint32_t length, offset;
284 length = ctdb_marshall_record_size(key, header, data);
287 offset = offsetof(struct ctdb_marshall_buffer, data);
288 m2 = talloc_zero_size(mem_ctx, offset + length);
290 offset = talloc_get_size(m);
291 m2 = talloc_realloc_size(mem_ctx, m, offset + length);
302 r = (struct ctdb_rec_data *)((uint8_t *)m2 + offset);
303 ctdb_marshall_record_copy(r, reqid, key, header, data, length);
309 /* we've finished marshalling, return a data blob with the marshalled records */
310 TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m)
313 data.dptr = (uint8_t *)m;
314 data.dsize = talloc_get_size(m);
319 loop over a marshalling buffer
321 - pass r==NULL to start
322 - loop the number of times indicated by m->count
324 struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r,
326 struct ctdb_ltdb_header *header,
327 TDB_DATA *key, TDB_DATA *data)
330 r = (struct ctdb_rec_data *)&m->data[0];
332 r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r);
340 key->dptr = &r->data[0];
341 key->dsize = r->keylen;
344 data->dptr = &r->data[r->keylen];
345 data->dsize = r->datalen;
346 if (header != NULL) {
347 data->dptr += sizeof(*header);
348 data->dsize -= sizeof(*header);
352 if (header != NULL) {
353 if (r->datalen < sizeof(*header)) {
356 memcpy(header, &r->data[r->keylen], sizeof(*header));
363 This is used to canonicalize a ctdb_sock_addr structure.
365 void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip)
367 char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
369 memcpy(cip, ip, sizeof (*cip));
371 if ( (ip->sa.sa_family == AF_INET6)
372 && !memcmp(&ip->ip6.sin6_addr, prefix, 12)) {
373 memset(cip, 0, sizeof(*cip));
374 #ifdef HAVE_SOCK_SIN_LEN
375 cip->ip.sin_len = sizeof(*cip);
377 cip->ip.sin_family = AF_INET;
378 cip->ip.sin_port = ip->ip6.sin6_port;
379 memcpy(&cip->ip.sin_addr, &ip->ip6.sin6_addr.s6_addr[12], 4);
383 bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2)
385 ctdb_sock_addr ip1, ip2;
387 ctdb_canonicalize_ip(tip1, &ip1);
388 ctdb_canonicalize_ip(tip2, &ip2);
390 if (ip1.sa.sa_family != ip2.sa.sa_family) {
394 switch (ip1.sa.sa_family) {
396 return ip1.ip.sin_addr.s_addr == ip2.ip.sin_addr.s_addr;
398 return !memcmp(&ip1.ip6.sin6_addr.s6_addr[0],
399 &ip2.ip6.sin6_addr.s6_addr[0],
402 DEBUG(DEBUG_ERR, (__location__ " CRITICAL Can not compare sockaddr structures of type %u\n", ip1.sa.sa_family));
410 compare two ctdb_sock_addr structures
412 bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2)
414 return ctdb_same_ip(ip1, ip2) && ip1->ip.sin_port == ip2->ip.sin_port;
417 char *ctdb_addr_to_str(ctdb_sock_addr *addr)
419 static char cip[128] = "";
421 switch (addr->sa.sa_family) {
423 inet_ntop(addr->ip.sin_family, &addr->ip.sin_addr, cip, sizeof(cip));
426 inet_ntop(addr->ip6.sin6_family, &addr->ip6.sin6_addr, cip, sizeof(cip));
429 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
430 ctdb_external_trace();
436 unsigned ctdb_addr_to_port(ctdb_sock_addr *addr)
438 switch (addr->sa.sa_family) {
440 return ntohs(addr->ip.sin_port);
443 return ntohs(addr->ip6.sin6_port);
446 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
452 /* Add a node to a node map with given address and flags */
453 static bool node_map_add(TALLOC_CTX *mem_ctx,
454 const char *nstr, uint32_t flags,
455 struct ctdb_node_map **node_map)
460 struct ctdb_node_and_flags *n;
462 /* Might as well do this before trying to allocate memory */
463 if (ctdb_parse_address(mem_ctx, nstr, &addr) == -1) {
467 num = (*node_map)->num + 1;
468 s = offsetof(struct ctdb_node_map, nodes) +
469 num * sizeof(struct ctdb_node_and_flags);
470 *node_map = talloc_realloc_size(mem_ctx, *node_map, s);
471 if (*node_map == NULL) {
472 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
476 n = &(*node_map)->nodes[(*node_map)->num];
478 n->pnn = (*node_map)->num;
486 /* Read a nodes file into a node map */
487 struct ctdb_node_map *ctdb_read_nodes_file(TALLOC_CTX *mem_ctx,
493 struct ctdb_node_map *ret;
495 /* Allocate node map header */
496 ret = talloc_zero_size(mem_ctx, offsetof(struct ctdb_node_map, nodes));
498 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
502 lines = file_lines_load(nlist, &nlines, 0, mem_ctx);
504 DEBUG(DEBUG_ERR, ("Failed to read nodes file \"%s\"\n", nlist));
507 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
511 for (i=0; i < nlines; i++) {
517 /* strip leading spaces */
518 while((*node == ' ') || (*node == '\t')) {
525 ((node[len-1] == ' ') || (node[len-1] == '\t')))
535 /* A "deleted" node is a node that is
536 commented out in the nodes file. This is
537 used instead of removing a line, which
538 would cause subsequent nodes to change
540 flags = NODE_FLAGS_DELETED;
541 node = discard_const("0.0.0.0");
545 if (!node_map_add(mem_ctx, node, flags, &ret)) {
556 struct ctdb_node_map *
557 ctdb_node_list_to_map(struct ctdb_node **nodes, uint32_t num_nodes,
562 struct ctdb_node_map *node_map;
564 size = offsetof(struct ctdb_node_map, nodes) +
565 num_nodes * sizeof(struct ctdb_node_and_flags);
566 node_map = (struct ctdb_node_map *)talloc_zero_size(mem_ctx, size);
567 if (node_map == NULL) {
569 (__location__ " Failed to allocate nodemap array\n"));
573 node_map->num = num_nodes;
574 for (i=0; i<num_nodes; i++) {
575 node_map->nodes[i].addr = nodes[i]->address;
576 node_map->nodes[i].pnn = nodes[i]->pnn;
577 node_map->nodes[i].flags = nodes[i]->flags;
583 const char *ctdb_eventscript_call_names[] = {
600 /* Runstate handling */
602 enum ctdb_runstate runstate;
605 { CTDB_RUNSTATE_UNKNOWN, "UNKNOWN" },
606 { CTDB_RUNSTATE_INIT, "INIT" },
607 { CTDB_RUNSTATE_SETUP, "SETUP" },
608 { CTDB_RUNSTATE_FIRST_RECOVERY, "FIRST_RECOVERY" },
609 { CTDB_RUNSTATE_STARTUP, "STARTUP" },
610 { CTDB_RUNSTATE_RUNNING, "RUNNING" },
611 { CTDB_RUNSTATE_SHUTDOWN, "SHUTDOWN" },
615 const char *runstate_to_string(enum ctdb_runstate runstate)
618 for (i=0; runstate_map[i].label != NULL ; i++) {
619 if (runstate_map[i].runstate == runstate) {
620 return runstate_map[i].label;
624 return runstate_map[0].label;
627 enum ctdb_runstate runstate_from_string(const char *label)
630 for (i=0; runstate_map[i].label != NULL; i++) {
631 if (strcasecmp(runstate_map[i].label, label) == 0) {
632 return runstate_map[i].runstate;
636 return CTDB_RUNSTATE_UNKNOWN;
639 void ctdb_set_runstate(struct ctdb_context *ctdb, enum ctdb_runstate runstate)
641 if (runstate <= ctdb->runstate) {
642 ctdb_fatal(ctdb, "runstate must always increase");
645 DEBUG(DEBUG_NOTICE,("Set runstate to %s (%d)\n",
646 runstate_to_string(runstate), runstate));
647 ctdb->runstate = runstate;
650 /* Convert arbitrary data to 4-byte boundary padded uint32 array */
651 uint32_t *ctdb_key_to_idkey(TALLOC_CTX *mem_ctx, TDB_DATA key)
653 uint32_t idkey_size, *k;
655 idkey_size = 1 + (key.dsize + sizeof(uint32_t)-1) / sizeof(uint32_t);
657 k = talloc_zero_array(mem_ctx, uint32_t, idkey_size);
663 memcpy(&k[1], key.dptr, key.dsize);