4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "system/shmem.h"
26 #include "../include/ctdb_private.h"
28 int LogLevel = DEBUG_NOTICE;
29 int this_log_level = 0;
32 return error string for last error
34 const char *ctdb_errstr(struct ctdb_context *ctdb)
41 remember an error message
43 void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...)
46 talloc_free(ctdb->err_msg);
48 ctdb->err_msg = talloc_vasprintf(ctdb, fmt, ap);
49 DEBUG(DEBUG_ERR,("ctdb error: %s\n", ctdb->err_msg));
54 a fatal internal error occurred - no hope for recovery
56 void ctdb_fatal(struct ctdb_context *ctdb, const char *msg)
58 DEBUG(DEBUG_ALERT,("ctdb fatal error: %s\n", msg));
63 like ctdb_fatal() but a core/backtrace would not be useful
65 void ctdb_die(struct ctdb_context *ctdb, const char *msg)
67 DEBUG(DEBUG_ALERT,("ctdb exiting with error: %s\n", msg));
71 /* Invoke an external program to do some sort of tracing on the CTDB
72 * process. This might block for a little while. The external
73 * program is specified by the environment variable
74 * CTDB_EXTERNAL_TRACE. This program should take one argument: the
75 * pid of the process to trace. Commonly, the program would be a
76 * wrapper script around gcore.
78 void ctdb_external_trace(void)
81 const char * t = getenv("CTDB_EXTERNAL_TRACE");
88 cmd = talloc_asprintf(NULL, "%s %lu", t, (unsigned long) getpid());
89 DEBUG(DEBUG_WARNING,("begin external trace: %s\n", cmd));
91 DEBUG(DEBUG_WARNING,("end external trace: %s\n", cmd));
98 int ctdb_parse_address(struct ctdb_context *ctdb,
99 TALLOC_CTX *mem_ctx, const char *str,
100 struct ctdb_address *address)
105 se = getservbyname("ctdb", "tcp");
108 address->address = talloc_strdup(mem_ctx, str);
109 CTDB_NO_MEMORY(ctdb, address->address);
112 address->port = CTDB_PORT;
114 address->port = ntohs(se->s_port);
121 check if two addresses are the same
123 bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2)
125 return strcmp(a1->address, a2->address) == 0 && a1->port == a2->port;
130 hash function for mapping data to a VNN - taken from tdb
132 uint32_t ctdb_hash(const TDB_DATA *key)
134 return tdb_jenkins_hash(discard_const(key));
138 a type checking varient of idr_find
140 static void *_idr_find_type(struct idr_context *idp, int id, const char *type, const char *location)
142 void *p = idr_find(idp, id);
143 if (p && talloc_check_name(p, type) == NULL) {
144 DEBUG(DEBUG_ERR,("%s idr_find_type expected type %s but got %s\n",
145 location, type, talloc_get_name(p)));
151 uint32_t ctdb_reqid_new(struct ctdb_context *ctdb, void *state)
153 int id = idr_get_new_above(ctdb->idr, state, ctdb->lastid+1, INT_MAX);
155 DEBUG(DEBUG_DEBUG, ("Reqid wrap!\n"));
156 id = idr_get_new(ctdb->idr, state, INT_MAX);
162 void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *type, const char *location)
166 p = _idr_find_type(ctdb->idr, reqid, type, location);
168 DEBUG(DEBUG_WARNING, ("Could not find idr:%u\n",reqid));
175 void ctdb_reqid_remove(struct ctdb_context *ctdb, uint32_t reqid)
179 ret = idr_remove(ctdb->idr, reqid);
181 DEBUG(DEBUG_ERR, ("Removing idr that does not exist\n"));
187 form a ctdb_rec_data record from a key/data pair
189 note that header may be NULL. If not NULL then it is included in the data portion
192 struct ctdb_rec_data *ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32_t reqid,
194 struct ctdb_ltdb_header *header,
198 struct ctdb_rec_data *d;
200 length = offsetof(struct ctdb_rec_data, data) + key.dsize +
201 data.dsize + (header?sizeof(*header):0);
202 d = (struct ctdb_rec_data *)talloc_size(mem_ctx, length);
208 d->keylen = key.dsize;
209 memcpy(&d->data[0], key.dptr, key.dsize);
211 d->datalen = data.dsize + sizeof(*header);
212 memcpy(&d->data[key.dsize], header, sizeof(*header));
213 memcpy(&d->data[key.dsize+sizeof(*header)], data.dptr, data.dsize);
215 d->datalen = data.dsize;
216 memcpy(&d->data[key.dsize], data.dptr, data.dsize);
222 /* helper function for marshalling multiple records */
223 struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx,
224 struct ctdb_marshall_buffer *m,
228 struct ctdb_ltdb_header *header,
231 struct ctdb_rec_data *r;
232 size_t m_size, r_size;
233 struct ctdb_marshall_buffer *m2;
235 r = ctdb_marshall_record(mem_ctx, reqid, key, header, data);
242 m = talloc_zero_size(mem_ctx, offsetof(struct ctdb_marshall_buffer, data));
249 m_size = talloc_get_size(m);
250 r_size = talloc_get_size(r);
252 m2 = talloc_realloc_size(mem_ctx, m, m_size + r_size);
258 memcpy(m_size + (uint8_t *)m2, r, r_size);
267 /* we've finished marshalling, return a data blob with the marshalled records */
268 TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m)
271 data.dptr = (uint8_t *)m;
272 data.dsize = talloc_get_size(m);
277 loop over a marshalling buffer
279 - pass r==NULL to start
280 - loop the number of times indicated by m->count
282 struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r,
284 struct ctdb_ltdb_header *header,
285 TDB_DATA *key, TDB_DATA *data)
288 r = (struct ctdb_rec_data *)&m->data[0];
290 r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r);
298 key->dptr = &r->data[0];
299 key->dsize = r->keylen;
302 data->dptr = &r->data[r->keylen];
303 data->dsize = r->datalen;
304 if (header != NULL) {
305 data->dptr += sizeof(*header);
306 data->dsize -= sizeof(*header);
310 if (header != NULL) {
311 if (r->datalen < sizeof(*header)) {
314 *header = *(struct ctdb_ltdb_header *)&r->data[r->keylen];
326 #include <procinfo.h>
330 if possible, make this task real time
332 void ctdb_set_scheduler(struct ctdb_context *ctdb)
335 #if HAVE_THREAD_SETSCHED
336 struct thrdentry64 te;
340 if (getthrds64(getpid(), &te, sizeof(te), &ti, 1) != 1) {
341 DEBUG(DEBUG_ERR, ("Unable to get thread information\n"));
345 if (thread_setsched(te.ti_tid, 0, SCHED_RR) == -1) {
346 DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_RR (%s)\n",
349 DEBUG(DEBUG_NOTICE, ("Set scheduler to SCHED_RR\n"));
353 #if HAVE_SCHED_SETSCHEDULER
354 struct sched_param p;
356 p.sched_priority = 1;
358 if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) {
359 DEBUG(DEBUG_CRIT,("Unable to set scheduler to SCHED_FIFO (%s)\n",
362 DEBUG(DEBUG_NOTICE,("Set scheduler to SCHED_FIFO\n"));
369 restore previous scheduler parameters
371 void ctdb_restore_scheduler(struct ctdb_context *ctdb)
374 #if HAVE_THREAD_SETSCHED
375 struct thrdentry64 te;
379 if (getthrds64(getpid(), &te, sizeof(te), &ti, 1) != 1) {
380 DEBUG(DEBUG_ERR, ("Unable to get thread information\n"));
382 if (thread_setsched(te.ti_tid, 0, SCHED_OTHER) == -1) {
383 DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_OTHER\n"));
387 #if HAVE_SCHED_SETSCHEDULER
388 struct sched_param p;
390 p.sched_priority = 0;
391 if (sched_setscheduler(0, SCHED_OTHER, &p) == -1) {
392 DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_OTHER\n"));
398 void set_nonblocking(int fd)
402 v = fcntl(fd, F_GETFL, 0);
404 DEBUG(DEBUG_WARNING, ("Failed to get file status flags - %s\n",
408 if (fcntl(fd, F_SETFL, v | O_NONBLOCK) == -1) {
409 DEBUG(DEBUG_WARNING, ("Failed to set non_blocking on fd - %s\n",
414 void set_close_on_exec(int fd)
418 v = fcntl(fd, F_GETFD, 0);
420 DEBUG(DEBUG_WARNING, ("Failed to get file descriptor flags - %s\n",
424 if (fcntl(fd, F_SETFD, v | FD_CLOEXEC) != 0) {
425 DEBUG(DEBUG_WARNING, ("Failed to set close_on_exec on fd - %s\n",
431 bool parse_ipv4(const char *s, unsigned port, struct sockaddr_in *sin)
433 sin->sin_family = AF_INET;
434 sin->sin_port = htons(port);
436 if (inet_pton(AF_INET, s, &sin->sin_addr) != 1) {
437 DEBUG(DEBUG_ERR, (__location__ " Failed to translate %s into sin_addr\n", s));
444 static bool parse_ipv6(const char *s, const char *ifaces, unsigned port, ctdb_sock_addr *saddr)
446 saddr->ip6.sin6_family = AF_INET6;
447 saddr->ip6.sin6_port = htons(port);
448 saddr->ip6.sin6_flowinfo = 0;
449 saddr->ip6.sin6_scope_id = 0;
451 if (inet_pton(AF_INET6, s, &saddr->ip6.sin6_addr) != 1) {
452 DEBUG(DEBUG_ERR, (__location__ " Failed to translate %s into sin6_addr\n", s));
456 if (ifaces && IN6_IS_ADDR_LINKLOCAL(&saddr->ip6.sin6_addr)) {
457 if (strchr(ifaces, ',')) {
458 DEBUG(DEBUG_ERR, (__location__ " Link local address %s "
459 "is specified for multiple ifaces %s\n",
463 saddr->ip6.sin6_scope_id = if_nametoindex(ifaces);
471 bool parse_ip_port(const char *addr, ctdb_sock_addr *saddr)
473 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
479 s = talloc_strdup(tmp_ctx, addr);
481 DEBUG(DEBUG_ERR, (__location__ " Failed strdup()\n"));
482 talloc_free(tmp_ctx);
488 DEBUG(DEBUG_ERR, (__location__ " This addr: %s does not contain a port number\n", s));
489 talloc_free(tmp_ctx);
493 port = strtoul(p+1, &endp, 10);
494 if (endp == NULL || *endp != 0) {
495 /* trailing garbage */
496 DEBUG(DEBUG_ERR, (__location__ " Trailing garbage after the port in %s\n", s));
497 talloc_free(tmp_ctx);
503 /* now is this a ipv4 or ipv6 address ?*/
504 ret = parse_ip(s, NULL, port, saddr);
506 talloc_free(tmp_ctx);
513 bool parse_ip(const char *addr, const char *ifaces, unsigned port, ctdb_sock_addr *saddr)
518 ZERO_STRUCTP(saddr); /* valgrind :-) */
520 /* now is this a ipv4 or ipv6 address ?*/
521 p = index(addr, ':');
523 ret = parse_ipv4(addr, port, &saddr->ip);
525 ret = parse_ipv6(addr, ifaces, port, saddr);
534 bool parse_ip_mask(const char *str, const char *ifaces, ctdb_sock_addr *addr, unsigned *mask)
536 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
542 s = talloc_strdup(tmp_ctx, str);
544 DEBUG(DEBUG_ERR, (__location__ " Failed strdup()\n"));
545 talloc_free(tmp_ctx);
551 DEBUG(DEBUG_ERR, (__location__ " This addr: %s does not contain a mask\n", s));
552 talloc_free(tmp_ctx);
556 *mask = strtoul(p+1, &endp, 10);
557 if (endp == NULL || *endp != 0) {
558 /* trailing garbage */
559 DEBUG(DEBUG_ERR, (__location__ " Trailing garbage after the mask in %s\n", s));
560 talloc_free(tmp_ctx);
566 /* now is this a ipv4 or ipv6 address ?*/
567 ret = parse_ip(s, ifaces, 0, addr);
569 talloc_free(tmp_ctx);
574 This is used to canonicalize a ctdb_sock_addr structure.
576 void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip)
578 char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
580 memcpy(cip, ip, sizeof (*cip));
582 if ( (ip->sa.sa_family == AF_INET6)
583 && !memcmp(&ip->ip6.sin6_addr, prefix, 12)) {
584 memset(cip, 0, sizeof(*cip));
585 #ifdef HAVE_SOCK_SIN_LEN
586 cip->ip.sin_len = sizeof(*cip);
588 cip->ip.sin_family = AF_INET;
589 cip->ip.sin_port = ip->ip6.sin6_port;
590 memcpy(&cip->ip.sin_addr, &ip->ip6.sin6_addr.s6_addr[12], 4);
594 bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2)
596 ctdb_sock_addr ip1, ip2;
598 ctdb_canonicalize_ip(tip1, &ip1);
599 ctdb_canonicalize_ip(tip2, &ip2);
601 if (ip1.sa.sa_family != ip2.sa.sa_family) {
605 switch (ip1.sa.sa_family) {
607 return ip1.ip.sin_addr.s_addr == ip2.ip.sin_addr.s_addr;
609 return !memcmp(&ip1.ip6.sin6_addr.s6_addr[0],
610 &ip2.ip6.sin6_addr.s6_addr[0],
613 DEBUG(DEBUG_ERR, (__location__ " CRITICAL Can not compare sockaddr structures of type %u\n", ip1.sa.sa_family));
621 compare two ctdb_sock_addr structures
623 bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2)
625 return ctdb_same_ip(ip1, ip2) && ip1->ip.sin_port == ip2->ip.sin_port;
628 char *ctdb_addr_to_str(ctdb_sock_addr *addr)
630 static char cip[128] = "";
632 switch (addr->sa.sa_family) {
634 inet_ntop(addr->ip.sin_family, &addr->ip.sin_addr, cip, sizeof(cip));
637 inet_ntop(addr->ip6.sin6_family, &addr->ip6.sin6_addr, cip, sizeof(cip));
640 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
641 ctdb_external_trace();
647 unsigned ctdb_addr_to_port(ctdb_sock_addr *addr)
649 switch (addr->sa.sa_family) {
651 return ntohs(addr->ip.sin_port);
654 return ntohs(addr->ip6.sin6_port);
657 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
663 void ctdb_block_signal(int signum)
667 sigaddset(&set,signum);
668 sigprocmask(SIG_BLOCK,&set,NULL);
671 void ctdb_unblock_signal(int signum)
675 sigaddset(&set,signum);
676 sigprocmask(SIG_UNBLOCK,&set,NULL);
679 struct debug_levels debug_levels[] = {
680 {DEBUG_EMERG, "EMERG"},
681 {DEBUG_ALERT, "ALERT"},
682 {DEBUG_CRIT, "CRIT"},
684 {DEBUG_WARNING, "WARNING"},
685 {DEBUG_NOTICE, "NOTICE"},
686 {DEBUG_INFO, "INFO"},
687 {DEBUG_DEBUG, "DEBUG"},
691 const char *get_debug_by_level(int32_t level)
695 for (i=0; debug_levels[i].description != NULL; i++) {
696 if (debug_levels[i].level == level) {
697 return debug_levels[i].description;
703 int32_t get_debug_by_desc(const char *desc)
707 for (i=0; debug_levels[i].description != NULL; i++) {
708 if (!strcasecmp(debug_levels[i].description, desc)) {
709 return debug_levels[i].level;
716 /* we don't lock future pages here; it would increase the chance that
717 * we'd fail to mmap later on. */
718 void ctdb_lockdown_memory(struct ctdb_context *ctdb)
720 #if defined(HAVE_MLOCKALL) && !defined(_AIX_)
721 /* Extra stack, please! */
723 memset(dummy, 0, sizeof(dummy));
725 if (ctdb->valgrinding) {
729 /* Ignore when running in local daemons mode */
734 /* Avoid compiler optimizing out dummy. */
735 mlock(dummy, sizeof(dummy));
736 if (mlockall(MCL_CURRENT) != 0) {
737 DEBUG(DEBUG_WARNING,("Failed to lockdown memory: %s'\n",
743 const char *ctdb_eventscript_call_names[] = {
760 /* Runstate handling */
762 enum ctdb_runstate runstate;
765 { CTDB_RUNSTATE_UNKNOWN, "UNKNOWN" },
766 { CTDB_RUNSTATE_INIT, "INIT" },
767 { CTDB_RUNSTATE_SETUP, "SETUP" },
768 { CTDB_RUNSTATE_FIRST_RECOVERY, "FIRST_RECOVERY" },
769 { CTDB_RUNSTATE_STARTUP, "STARTUP" },
770 { CTDB_RUNSTATE_RUNNING, "RUNNING" },
771 { CTDB_RUNSTATE_SHUTDOWN, "SHUTDOWN" },
775 const char *runstate_to_string(enum ctdb_runstate runstate)
778 for (i=0; runstate_map[i].label != NULL ; i++) {
779 if (runstate_map[i].runstate == runstate) {
780 return runstate_map[i].label;
784 return runstate_map[0].label;
787 enum ctdb_runstate runstate_from_string(const char *label)
790 for (i=0; runstate_map[i].label != NULL; i++) {
791 if (strcasecmp(runstate_map[i].label, label) == 0) {
792 return runstate_map[i].runstate;
796 return CTDB_RUNSTATE_UNKNOWN;
799 void ctdb_set_runstate(struct ctdb_context *ctdb, enum ctdb_runstate runstate)
801 if (runstate <= ctdb->runstate) {
802 ctdb_fatal(ctdb, "runstate must always increase");
805 DEBUG(DEBUG_NOTICE,("Set runstate to %s (%d)\n",
806 runstate_to_string(runstate), runstate));
807 ctdb->runstate = runstate;
810 void ctdb_mkdir_p_or_die(struct ctdb_context *ctdb, const char *dir, int mode)
814 ret = mkdir_p(dir, mode);
817 ("ctdb exiting with error: "
818 "failed to create directory \"%s\" (%s)\n",
819 dir, strerror(errno)));