Dont set next_interval to 0.
[metze/ctdb/wip.git] / server / ctdbd.c
index 218c6582907fca7e196f8bf4e3670bf32cec3c31..89b9af179bc4c78c3c4baf52b84e45040ed19a53 100644 (file)
 */
 
 #include "includes.h"
-#include "lib/events/events.h"
+#include "lib/tevent/tevent.h"
 #include "system/filesys.h"
 #include "popt.h"
+#include "system/time.h"
 #include "system/wait.h"
+#include "system/network.h"
 #include "cmdline.h"
 #include "../include/ctdb_private.h"
 
-static void block_signal(int signum)
-{
-       struct sigaction act;
-
-       memset(&act, 0, sizeof(act));
-
-       act.sa_handler = SIG_IGN;
-       sigemptyset(&act.sa_mask);
-       sigaddset(&act.sa_mask, signum);
-       sigaction(signum, &act, NULL);
-}
-
 static struct {
        const char *nlist;
        const char *transport;
        const char *myaddress;
        const char *public_address_list;
        const char *event_script_dir;
+       const char *notification_script;
        const char *logfile;
        const char *recovery_lock_file;
        const char *db_dir;
-       int         no_setsched;
+       const char *db_dir_persistent;
+       const char *db_dir_state;
+       const char *public_interface;
+       const char *single_public_ip;
+       const char *node_ip;
+       int         valgrinding;
+       int         use_syslog;
+       int         start_as_disabled;
+       int         start_as_stopped;
+       int         no_lmaster;
+       int         no_recmaster;
+       int         lvs;
+       int         script_log_level;
+       int         no_publicipcheck;
+       int         max_persistent_check_errors;
 } options = {
        .nlist = ETCDIR "/ctdb/nodes",
        .transport = "tcp",
        .event_script_dir = ETCDIR "/ctdb/events.d",
-       .logfile = VARDIR "/log/log.ctdb",
+       .logfile = LOGDIR "/log.ctdb",
        .db_dir = VARDIR "/ctdb",
+       .db_dir_persistent = VARDIR "/ctdb/persistent",
+       .db_dir_state = VARDIR "/ctdb/state",
+       .script_log_level = DEBUG_ERR,
 };
 
+int script_log_level;
+bool fast_start;
 
 /*
   called by the transport layer when a packet comes in
@@ -77,7 +87,16 @@ static void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t len
        ctdb_input_pkt(ctdb, hdr);
 }
 
+void ctdb_load_nodes_file(struct ctdb_context *ctdb)
+{
+       int ret;
 
+       ret = ctdb_set_nlist(ctdb, options.nlist);
+       if (ret == -1) {
+               DEBUG(DEBUG_ALERT,("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb)));
+               exit(1);
+       }
+}
 
 static const struct ctdb_upcalls ctdb_upcalls = {
        .recv_pkt       = ctdb_recv_pkt,
@@ -100,14 +119,33 @@ int main(int argc, const char *argv[])
                POPT_CTDB_CMDLINE
                { "interactive", 'i', POPT_ARG_NONE, &interactive, 0, "don't fork", NULL },
                { "public-addresses", 0, POPT_ARG_STRING, &options.public_address_list, 0, "public address list file", "filename" },
+               { "public-interface", 0, POPT_ARG_STRING, &options.public_interface, 0, "public interface", "interface"},
+               { "single-public-ip", 0, POPT_ARG_STRING, &options.single_public_ip, 0, "single public ip", "ip-address"},
                { "event-script-dir", 0, POPT_ARG_STRING, &options.event_script_dir, 0, "event script directory", "dirname" },
                { "logfile", 0, POPT_ARG_STRING, &options.logfile, 0, "log file location", "filename" },
                { "nlist", 0, POPT_ARG_STRING, &options.nlist, 0, "node list file", "filename" },
+               { "node-ip", 0, POPT_ARG_STRING, &options.node_ip, 0, "node ip", "ip-address"},
+               { "notification-script", 0, POPT_ARG_STRING, &options.notification_script, 0, "notification script", "filename" },
                { "listen", 0, POPT_ARG_STRING, &options.myaddress, 0, "address to listen on", "address" },
                { "transport", 0, POPT_ARG_STRING, &options.transport, 0, "protocol transport", NULL },
                { "dbdir", 0, POPT_ARG_STRING, &options.db_dir, 0, "directory for the tdb files", NULL },
+               { "dbdir-persistent", 0, POPT_ARG_STRING, &options.db_dir_persistent, 0, "directory for persistent tdb files", NULL },
+               { "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
                { "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
-               { "nosetsched", 0, POPT_ARG_NONE, &options.no_setsched, 0, "disable setscheduler SCHED_FIFO call", NULL },
+               { "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "make valgrind more effective", NULL },
+               { "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
+               { "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
+               { "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
+               { "no-lmaster", 0, POPT_ARG_NONE, &options.no_lmaster, 0, "disable lmaster role on this node", NULL },
+               { "no-recmaster", 0, POPT_ARG_NONE, &options.no_recmaster, 0, "disable recmaster role on this node", NULL },
+               { "lvs", 0, POPT_ARG_NONE, &options.lvs, 0, "lvs is enabled on this node", NULL },
+               { "script-log-level", 0, POPT_ARG_INT, &options.script_log_level, DEBUG_ERR, "log level of event script output", NULL },
+               { "nopublicipcheck", 0, POPT_ARG_NONE, &options.no_publicipcheck, 0, "don't check we have/don't have the correct public ip addresses", NULL },
+               { "max-persistent-check-errors", 0, POPT_ARG_INT,
+                 &options.max_persistent_check_errors, 0,
+                 "max allowed persistent check errors (default 0)", NULL },
+               { "log-ringbuf-size", 0, POPT_ARG_INT, &log_ringbuf_size, DEBUG_ERR, "Number of log messages we can store in the memory ringbuffer", NULL },
+               { "sloppy-start", 0, POPT_ARG_NONE, &fast_start, 0, "Do not perform full recovery on start", NULL },
                POPT_TABLEEND
        };
        int opt, ret;
@@ -134,43 +172,50 @@ int main(int argc, const char *argv[])
                while (extra_argv[extra_argc]) extra_argc++;
        }
 
-       if (!options.recovery_lock_file) {
-               DEBUG(0,("You must specifiy the location of a recovery lock file with --reclock\n"));
-               exit(1);
-       }
+       talloc_enable_null_tracking();
 
-       block_signal(SIGPIPE);
+       ctdb_block_signal(SIGPIPE);
+       fault_setup("ctdbd");
 
        ev = event_context_init(NULL);
+       tevent_loop_allow_nesting(ev);
 
        ctdb = ctdb_cmdline_init(ev);
 
-       ret = ctdb_set_logfile(ctdb, options.logfile);
+       ctdb->start_as_disabled = options.start_as_disabled;
+       ctdb->start_as_stopped  = options.start_as_stopped;
+
+       script_log_level = options.script_log_level;
+
+       ret = ctdb_set_logfile(ctdb, options.logfile, options.use_syslog);
        if (ret == -1) {
-               printf("ctdb_set_logfile to %s failed - %s\n", options.logfile, ctdb_errstr(ctdb));
+               printf("ctdb_set_logfile to %s failed - %s\n", 
+                      options.use_syslog?"syslog":options.logfile, ctdb_errstr(ctdb));
                exit(1);
        }
 
-       DEBUG(0,("Starting CTDB daemon\n"));
-
+       DEBUG(DEBUG_NOTICE,("Starting CTDB daemon\n"));
+       gettimeofday(&ctdb->ctdbd_start_time, NULL);
+       gettimeofday(&ctdb->last_recovery_started, NULL);
+       gettimeofday(&ctdb->last_recovery_finished, NULL);
        ctdb->recovery_mode    = CTDB_RECOVERY_NORMAL;
        ctdb->recovery_master  = (uint32_t)-1;
        ctdb->upcalls          = &ctdb_upcalls;
        ctdb->idr              = idr_init(ctdb);
        ctdb->recovery_lock_fd = -1;
-       ctdb->monitoring_mode  = CTDB_MONITORING_ACTIVE;
 
        ctdb_tunables_set_defaults(ctdb);
 
+
        ret = ctdb_set_recovery_lock_file(ctdb, options.recovery_lock_file);
        if (ret == -1) {
-               DEBUG(0,("ctdb_set_recovery_lock_file failed - %s\n", ctdb_errstr(ctdb)));
+               DEBUG(DEBUG_ALERT,("ctdb_set_recovery_lock_file failed - %s\n", ctdb_errstr(ctdb)));
                exit(1);
        }
 
        ret = ctdb_set_transport(ctdb, options.transport);
        if (ret == -1) {
-               DEBUG(0,("ctdb_set_transport failed - %s\n", ctdb_errstr(ctdb)));
+               DEBUG(DEBUG_ALERT,("ctdb_set_transport failed - %s\n", ctdb_errstr(ctdb)));
                exit(1);
        }
 
@@ -178,50 +223,119 @@ int main(int argc, const char *argv[])
        if (options.myaddress) {
                ret = ctdb_set_address(ctdb, options.myaddress);
                if (ret == -1) {
-                       DEBUG(0,("ctdb_set_address failed - %s\n", ctdb_errstr(ctdb)));
+                       DEBUG(DEBUG_ALERT,("ctdb_set_address failed - %s\n", ctdb_errstr(ctdb)));
                        exit(1);
                }
        }
 
+       /* set ctdbd capabilities */
+       ctdb->capabilities = 0;
+       if (options.no_lmaster == 0) {
+               ctdb->capabilities |= CTDB_CAP_LMASTER;
+       }
+       if (options.no_recmaster == 0) {
+               ctdb->capabilities |= CTDB_CAP_RECMASTER;
+       }
+       if (options.lvs != 0) {
+               ctdb->capabilities |= CTDB_CAP_LVS;
+       }
+
        /* tell ctdb what nodes are available */
-       ret = ctdb_set_nlist(ctdb, options.nlist);
-       if (ret == -1) {
-               DEBUG(0,("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb)));
-               exit(1);
+       ctdb_load_nodes_file(ctdb);
+
+       /* if a node-ip was specified, verify that it exists in the
+          nodes file
+       */
+       if (options.node_ip != NULL) {
+               DEBUG(DEBUG_NOTICE,("IP for this node is %s\n", options.node_ip));
+               ret = ctdb_ip_to_nodeid(ctdb, options.node_ip);
+               if (ret == -1) {
+                       DEBUG(DEBUG_ALERT,("The specified node-ip:%s is not a valid node address. Exiting.\n", options.node_ip));
+                       exit(1);
+               }
+               ctdb->node_ip = options.node_ip;
+               DEBUG(DEBUG_NOTICE,("This is node %d\n", ret));
        }
 
        if (options.db_dir) {
                ret = ctdb_set_tdb_dir(ctdb, options.db_dir);
                if (ret == -1) {
-                       DEBUG(0,("ctdb_set_tdb_dir failed - %s\n", ctdb_errstr(ctdb)));
+                       DEBUG(DEBUG_ALERT,("ctdb_set_tdb_dir failed - %s\n", ctdb_errstr(ctdb)));
+                       exit(1);
+               }
+       }
+       if (options.db_dir_persistent) {
+               ret = ctdb_set_tdb_dir_persistent(ctdb, options.db_dir_persistent);
+               if (ret == -1) {
+                       DEBUG(DEBUG_ALERT,("ctdb_set_tdb_dir_persistent failed - %s\n", ctdb_errstr(ctdb)));
+                       exit(1);
+               }
+       }
+       if (options.db_dir_state) {
+               ret = ctdb_set_tdb_dir_state(ctdb, options.db_dir_state);
+               if (ret == -1) {
+                       DEBUG(DEBUG_ALERT,("ctdb_set_tdb_dir_state failed - %s\n", ctdb_errstr(ctdb)));
                        exit(1);
                }
        }
 
+       if (options.public_interface) {
+               ctdb->default_public_interface = talloc_strdup(ctdb, options.public_interface);
+               CTDB_NO_MEMORY(ctdb, ctdb->default_public_interface);
+       }
+
+       if (options.single_public_ip) {
+               if (options.public_interface == NULL) {
+                       DEBUG(DEBUG_ALERT,("--single_public_ip used but --public_interface is not specified. You must specify the public interface when using single public ip. Exiting\n"));
+                       exit(10);
+               }
+
+               ret = ctdb_set_single_public_ip(ctdb, options.public_interface,
+                                               options.single_public_ip);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ALERT,("Invalid --single-public-ip argument : %s . This is not a valid ip address. Exiting.\n", options.single_public_ip));
+                       exit(10);
+               }
+       }
+
        if (options.public_address_list) {
                ret = ctdb_set_public_addresses(ctdb, options.public_address_list);
                if (ret == -1) {
-                       DEBUG(0,("Unable to setup public address list\n"));
+                       DEBUG(DEBUG_ALERT,("Unable to setup public address list\n"));
                        exit(1);
                }
        }
 
        ret = ctdb_set_event_script_dir(ctdb, options.event_script_dir);
        if (ret == -1) {
-               DEBUG(0,("Unable to setup event script directory\n"));
+               DEBUG(DEBUG_ALERT,("Unable to setup event script directory\n"));
                exit(1);
        }
 
-       /* useful default logfile */
-       if (ctdb->logfile == NULL) {
-               char *name = talloc_asprintf(ctdb, "%s/log.ctdb.pnn%u", 
-                                            VARDIR, ctdb->pnn);
-               ctdb_set_logfile(ctdb, name);
-               talloc_free(name);
+       if (options.notification_script != NULL) {
+               ret = ctdb_set_notification_script(ctdb, options.notification_script);
+               if (ret == -1) {
+                       DEBUG(DEBUG_ALERT,("Unable to setup notification script\n"));
+                       exit(1);
+               }
+       }
+
+       ctdb->valgrinding = options.valgrinding;
+
+       ctdb->do_checkpublicip = !options.no_publicipcheck;
+
+       if (options.max_persistent_check_errors < 0) {
+               ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
+       } else {
+               ctdb->max_persistent_check_errors = (uint64_t)options.max_persistent_check_errors;
        }
 
-       ctdb->do_setsched = !options.no_setsched;
+       if (getenv("CTDB_BASE") == NULL) {
+               /* setup a environment variable for the event scripts to use
+                  to find the installation directory */
+               setenv("CTDB_BASE", ETCDIR "/ctdb", 1);
+       }
 
        /* start the protocol running (as a child) */
-       return ctdb_start_daemon(ctdb, interactive?False:True);
+       return ctdb_start_daemon(ctdb, interactive?False:True, options.use_syslog);
 }