Add 60.ganesha to what gets installed by make install as well as by the RPM

[sahlberg/ctdb.git] / doc / ctdbd.1.xml
diff --git a/doc/ctdbd.1.xml b/doc/ctdbd.1.xml

index d69d3dcbc85b2867548446e1a90507893be58a6b..91e2f9fe81a282d1974e0901cd85702b025fb37e 100644 (file)
--- a/doc/ctdbd.1.xml
+++ b/doc/ctdbd.1.xml
@@ -5,6 +5,8 @@
  <refmeta>
         <refentrytitle>ctdbd</refentrytitle>
         <manvolnum>1</manvolnum>
+       <refmiscinfo class="source">ctdb</refmiscinfo>
+       <refmiscinfo class="manual">CTDB - clustered TDB database</refmiscinfo>
  </refmeta>
  
  
@@ -33,13 +35,16 @@
                 <arg choice="opt">--no-lmaster</arg>
                 <arg choice="opt">--no-recmaster</arg>
                 <arg choice="opt">--nosetsched</arg>
+               <arg choice="req">--notification-script=&lt;filename&gt;</arg>
                 <arg choice="opt">--public-addresses=&lt;filename&gt;</arg>
                 <arg choice="opt">--public-interface=&lt;interface&gt;</arg>
                 <arg choice="req">--reclock=&lt;filename&gt;</arg>
                 <arg choice="opt">--single-public-ip=&lt;address&gt;</arg>
                 <arg choice="opt">--socket=&lt;filename&gt;</arg>
                 <arg choice="opt">--start-as-disabled</arg>
+               <arg choice="opt">--start-as-stopped</arg>
                 <arg choice="opt">--syslog</arg>
+               <arg choice="opt">--log-ringbuf-size=&lt;num-entries&gt;</arg>
                 <arg choice="opt">--torture</arg>
                 <arg choice="opt">--transport=&lt;STRING&gt;</arg>
                 <arg choice="opt">--usage</arg>
@@ -212,12 +217,28 @@
             This option is used to tell ctdbd to NOT run as a real-time process
             and instead run ctdbd as a normal userspace process.
             This is useful for debugging and when you want to run ctdbd under
-           valgrind or gdb. (You dont want to attach valgrind or gdb to a
+           valgrind or gdb. (You don't want to attach valgrind or gdb to a
             real-time process.)
            </para>
          </listitem>
        </varlistentry>
  
+      <varlistentry><term>--notification-script=&lt;filename&gt;</term>
+        <listitem>
+          <para>
+           This specifies a script which will be invoked by ctdb when certain
+           state changes occur in ctdbd and when you may want to trigger this
+           to run certain scripts.
+          </para>
+          <para>
+            This file is usually /etc/ctdb/notify.sh .
+          </para>
+          <para>
+           See the NOTIFICATION SCRIPT section below for more information.
+          </para>
+        </listitem>
+      </varlistentry>
+
        <varlistentry><term>--public_addresses=&lt;filename&gt;</term>
          <listitem>
            <para>
@@ -237,7 +258,7 @@
             </para>
             <para>
             This is only required when using public ip addresses and only when
-           you dont specify the interface explicitly in /etc/ctdb/public_addresses or when you are using --single-public-ip.
+           you don't specify the interface explicitly in /etc/ctdb/public_addresses or when you are using --single-public-ip.
            </para>
            <para>
           If you omit this argument when using public addresses or single public ip, ctdb will not be able to send out Gratious ARPs correctly or be able to kill tcp connections correctly which will lead to application failures. 
@@ -248,59 +269,15 @@
        <varlistentry><term>--reclock=&lt;filename&gt;</term>
          <listitem>
            <para>
-            This is the name of the lock file stored of the shared cluster filesystem that ctdbd uses to arbitrate which node has the role of recovery-master.
+            This is the name of the lock file stored of the shared cluster filesystem that ctdbd uses to prevent split brains from occuring.
              This file must be stored on shared storage.
            </para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry><term>--single-public-ip=&lt;address&gt;</term>
-        <listitem>
            <para>
-            This option is used to activate the "ipmux" or the "lvs"
-           functionality of ctdb where the cluster provides a single 
-           public ip address for the entire cluster. When using this option
-           you must also use the --public-interface option.
-           </para>
-           <para>
-            In this mode, all nodes of the cluster will expose a single
-            ip address from all nodes with all incoming traffic to the cluster
-            being passed through the current recmaster. This functionality
-            is similar to using a load-balancing switch.
-          </para>
-          <para>
-            All incoming packets are sent to the recmaster which will multiplex
-            the clients across all available nodes and pass the packets on to
-            a different node in the cluster to manage the connection based
-            on the clients ip address. Outgoing packets however are sent
-            directly from the node that was choosen back to the client.
-            Since all incoming packets are sent through the recmaster this will
-            have a throughput and performance impact when used. This impact
-            in performance primarily affects write-performance while 
-           read-performance should be mainly unaffected.
-            Only use this feature if your environment is mostly-read 
-            (i.e. most traffic is from the nodes back to the clients) or
-            if it is not important to get maximum write-performance to the
-           cluster.
+           It is possible to run CTDB without a reclock file, but then there 
+           will be no protection against split brain if the network becomes
+           partitioned. Using CTDB without a reclock file is strongly
+           discouraged.
            </para>
-         <para>
-           This feature is completely controlled from the eventscripts and 
-           does not require any CTDBD involvement. However, the CTDBD daemon
-           does need to know that the "single public ip" exists so that the 
-           CTDBD daemon will allow clients to set up killtcp to work on this
-           ip address.
-           </para>
-           <para>
-           CTDBD only allows clients to use killtcp to kill off (RST) tcp
-           connections to/from an ip address that is either a normal public
-           address or to/from the ip address specified by --single-public-ip.
-           No other tcp connections are allowed to be specified with killtcp.
-         </para>
-         <para>
-         Please note that ipmux is obsolete. Use LVS, not ipmux.
-         Please see the LVS section in this manpage for instructions on
-         how to configure and use CTDB with LVS.
-         </para>
          </listitem>
        </varlistentry>
  
@@ -338,6 +315,24 @@
          </listitem>
        </varlistentry>
  
+      <varlistentry><term>--start-as-stopped</term>
+        <listitem>
+          <para>
+         This makes the ctdb daemon to be STOPPED when it starts up.
+          </para>
+          <para>
+         A node that is STOPPED does not host any public addresses. It is not part of the VNNMAP so it does act as an LMASTER. It also has all databases locked in recovery mode until restarted.
+          </para>
+          <para>
+         To restart and activate a STOPPED node, the command "ctdb continue" is used.
+          </para>
+          <para>
+         A node that is STOPPED will not host any services and will not be
+         reachable/used by any clients.
+          </para>
+        </listitem>
+      </varlistentry>
+
        <varlistentry><term>--syslog</term>
          <listitem>
            <para>
@@ -346,6 +341,33 @@
          </listitem>
        </varlistentry>
  
+      <varlistentry><term>--log-ringbuf-size=&lt;num-entries&gt;</term>
+        <listitem>
+          <para>
+           In addition to the normal loggign to a log file,
+           CTDBD also keeps a in-memory ringbuffer containing the most recent
+           log entries for all log levels (except DEBUG).
+          </para>
+          <para>
+           This is useful since it allows for keeping continous logs to a file
+           at a reasonable non-verbose level, but shortly after an incident has
+           occured, a much more detailed log can be pulled from memory. This
+           can allow you to avoid having to reproduce an issue due to the
+           on-disk logs being of insufficient detail.
+          </para>
+          <para>
+           This in-memory ringbuffer contains a fixed number of the most recent
+           entries. This is settable at startup either through the
+           --log-ringbuf-size argument, or preferably by using
+           CTDB_LOG_RINGBUF_SIZE in the sysconfig file.
+          </para>
+          <para>
+           Use the "ctdb getlog" command to access this log.
+          </para>
+        </listitem>
+      </varlistentry>
+
+
        <varlistentry><term>--torture</term>
          <listitem>
            <para>
@@ -541,6 +563,14 @@
        does not perticipate in the CTDB cluster but can still be communicated 
        with. I.e. ctdb commands can be sent to it.
      </para>
+
+    <para>
+      STOPPED - A node that is stopped does not host any public ip addresses,
+      nor is it part of the VNNMAP. A stopped node can not become LVSMASTER,
+      RECMASTER or NATGW.
+      This node does not perticipate in the CTDB cluster but can still be
+      communicated with. I.e. ctdb commands can be sent to it.
+    </para>
    </refsect1>
  
    <refsect1>
@@ -589,7 +619,7 @@
      eventually become banned from the cluster.
      This controls how long the culprit node will be banned from the cluster
      before it is allowed to try to join the cluster again.
-    Dont set to small. A node gets banned for a reason and it is usually due
+    Don't set to small. A node gets banned for a reason and it is usually due
      to real problems with the node.
      </para>
      </refsect2>
@@ -608,7 +638,7 @@
      <refsect2><title>EnableBans</title>
      <para>Default: 1</para>
      <para>
-    When set to 0, this disables BANNING completely in the cluster and thus nodes can not get banned, even it they break. Dont set to 0.
+    When set to 0, this disables BANNING completely in the cluster and thus nodes can not get banned, even it they break. Don't set to 0.
      </para>
      </refsect2>
      <refsect2><title>DeterministicIPs</title>
@@ -660,7 +690,7 @@ ctdb will try to reassign public IP addresses onto the new node as a way to dist
      all client access to the LVS address across this set of nodes. This set of nodes are all LVS capable nodes that are HEALTHY, or if no HEALTHY nodes exists
      all LVS capable nodes regardless of health status.
      LVS will however never loadbalance traffic to nodes that are BANNED,
-    DISABLED or DISCONNECTED. The "ctdb lvs" command is used to show
+    STOPPED, DISABLED or DISCONNECTED. The "ctdb lvs" command is used to show
      which nodes are currently load-balanced across.
      </para>
  
@@ -780,6 +810,300 @@ CTDB_CAPABILITY_RECMASTER=no
    </refsect1>
  
  
+  <refsect1><title>NAT-GW</title>
+    <para>
+      Sometimes it is desireable to run services on the CTDB node which will
+      need to originate outgoing traffic to external servers. This might
+      be contacting NIS servers, LDAP servers etc. etc.
+    </para>
+    <para>
+      This can sometimes be problematic since there are situations when a
+      node does not have any public ip addresses assigned. This could
+      be due to the nobe just being started up and no addresses have been
+      assigned yet or it could be that the node is UNHEALTHY in which
+      case all public addresses have been migrated off.
+    </para>
+    <para>
+      If then the service status of CTDB depends on such services being
+      able to always being able to originate traffic to external resources
+      this becomes extra troublesome. The node might be UNHEALTHY because
+      the service can not be reached, and the service can not be reached
+      because the node is UNHEALTHY.
+    </para>
+    <para>
+      There are two ways to solve this problem. The first is by assigning a
+      static ip address for one public interface on every node which will allow
+      every node to be able to route traffic to the public network even
+      if there are no public addresses assigned to the node.
+      This is the simplest way but it uses up a lot of ip addresses since you
+      have to assign both static and also public addresses to each node.
+    </para>
+    <refsect2><title>NAT-GW</title>
+    <para>
+      A second way is to use the built in NAT-GW feature in CTDB.
+      With NAT-GW you assign one public NATGW address for each natgw group.
+      Each NATGW group is a set of nodes in the cluster that shares the same
+      NATGW address to talk to the outside world. Normally there would only be
+      one NATGW group spanning the entire cluster, but in situations where one
+      ctdb cluster spans multiple physical sites it is useful to have one
+      NATGW group for each of the two sites.
+    </para>
+    <para>
+      There can be multiple NATGW groups in one cluster but each node can only
+      be member of one NATGW group.
+    </para>
+    <para>
+      In each NATGW group, one of the nodes is designated the NAT Gateway
+      through which all traffic that is originated by nodes in this group
+      will be routed through if a public addresses are not available. 
+    </para>
+    </refsect2>
+
+    <refsect2><title>Configuration</title>
+    <para>
+      NAT-GW is configured in /etc/sysconfigctdb by setting the following
+      variables:
+    </para>
+    <screen format="linespecific">
+# NAT-GW configuration
+# Some services running on nthe CTDB node may need to originate traffic to
+# remote servers before the node is assigned any IP addresses,
+# This is problematic since before the node has public addresses the node might
+# not be able to route traffic to the public networks.
+# One solution is to have static public addresses assigned with routing
+# in addition to the public address interfaces, thus guaranteeing that
+# a node always can route traffic to the external network.
+# This is the most simple solution but it uses up a large number of 
+# additional ip addresses.
+#
+# A more complex solution is NAT-GW.
+# In this mode we only need one additional ip address for the cluster from
+# the exsternal public network.
+# One of the nodes in the cluster is elected to be hosting this ip address
+# so it can reach the external services. This node is also configured
+# to use NAT MASQUERADING for all traffic from the internal private network
+# to the external network. This node is the NAT-GW node.
+#
+# All other nodes are set up with a default rote with a metric of 10 to point
+# to the nat-gw node.
+# 
+# The effect of this is that only when a node does not have a public address
+# and thus no proper routes to the external world it will instead
+# route all packets through the nat-gw node.
+#
+# CTDB_NATGW_NODES is the list of nodes that belong to this natgw group.
+# You can have multiple natgw groups in one cluster but each node
+# can only belong to one single natgw group.
+#
+# CTDB_NATGW_PUBLIC_IP=10.0.0.227/24
+# CTDB_NATGW_PUBLIC_IFACE=eth0
+# CTDB_NATGW_DEFAULT_GATEWAY=10.0.0.1
+# CTDB_NATGW_PRIVATE_NETWORK=10.1.1.0/24
+# CTDB_NATGW_NODES=/etc/ctdb/natgw_nodes
+#
+# Normally any node in the natgw group can act as the natgw master.
+# In some configurations you may have special nodes that is a part of the
+# cluster/natgw group, but where the node lacks connectivity to the 
+# public network.
+# For these cases, set this variable to make these nodes not able to
+# become natgw master.
+#
+# CTDB_NATGW_SLAVE_ONLY=yes
+    </screen>
+    </refsect2>
+
+    <refsect2><title>CTDB_NATGW_PUBLIC_IP</title>
+    <para>
+      This is an ip address in the public network that is used for all outgoing
+      traffic when the public addresses are not assigned.
+      This address will be assigned to one of the nodes in the cluster which
+      will masquerade all traffic for the other nodes.
+    </para>
+    <para>
+      Format of this parameter is IPADDRESS/NETMASK
+    </para>
+    </refsect2>
+
+    <refsect2><title>CTDB_NATGW_PUBLIC_IFACE</title>
+    <para>
+      This is the physical interface where the CTDB_NATGW_PUBLIC_IP will be
+      assigned to. This should be an interface connected to the public network.
+    </para>
+    <para>
+      Format of this parameter is INTERFACE
+    </para>
+    </refsect2>
+
+    <refsect2><title>CTDB_NATGW_DEFAULT_GATEWAY</title>
+    <para>
+      This is the default gateway to use on the node that is elected to host
+      the CTDB_NATGW_PUBLIC_IP. This is the default gateway on the public network.
+    </para>
+    <para>
+      Format of this parameter is IPADDRESS
+    </para>
+    </refsect2>
+
+    <refsect2><title>CTDB_NATGW_PRIVATE_NETWORK</title>
+    <para>
+      This is the network/netmask used for the interal private network.
+    </para>
+    <para>
+      Format of this parameter is IPADDRESS/NETMASK
+    </para>
+    </refsect2>
+
+    <refsect2><title>CTDB_NATGW_NODES</title>
+    <para>
+      This is the list of all nodes that belong to the same NATGW group
+      as this node. The default is /etc/ctdb/natgw_nodes.
+    </para>
+    </refsect2>
+
+    <refsect2><title>Operation</title>
+    <para>
+      When the NAT-GW functionality is used, one of the nodes is elected
+      to act as a NAT router for all the other nodes in the group when
+      they need to originate traffic to the external public network.
+    </para>
+    <para>
+      The NAT-GW node is assigned the CTDB_NATGW_PUBLIC_IP to the designated
+      interface and the provided default route. The NAT-GW is configured
+      to act as a router and to masquerade all traffic it receives from the
+      internal private network and which is destined to the external network(s).
+    </para>
+    <para>
+      All other nodes in the group are configured with a default route of
+      metric 10 pointing to the designated NAT GW node.
+    </para>
+    <para>
+      This is implemented in the 11.natgw eventscript. Please see the
+      eventscript for further information.
+    </para>
+
+    </refsect2>
+
+    <refsect2><title>Removing/Changing NATGW at runtime</title>
+    <para>
+      The following are the procedures to change/remove a NATGW configuration 
+      at runtime, without having to restart ctdbd.
+    </para>
+
+    <para>
+      If you want to remove NATGW completely from a node, use these steps:
+    </para>
+    <screen format="linespecific">
+1, Run 'CTDB_BASE=/etc/ctdb /etc/ctdb/events.d/11.natgw removenatgw'
+2, Then remove the configuration from /etc/sysconfig/ctdb
+    </screen>
+
+    <para>
+      If you want to change the NATGW configuration on a node :
+    </para>
+    <screen format="linespecific">
+1, Run 'CTDB_BASE=/etc/ctdb /etc/ctdb/events.d/11.natgw removenatgw'
+2, Then change the configuration in /etc/sysconfig/ctdb
+3, Run 'CTDB_BASE=/etc/ctdb /etc/ctdb/events.d/11.natgw updatenatgw'
+    </screen>
+
+    </refsect2>
+
+  </refsect1>
+
+  <refsect1><title>NOTIFICATION SCRIPT</title>
+    <para>
+      Notification scripts are used with ctdb to have a call-out from ctdb
+      to a user-specified script when certain state changes occur in ctdb.
+      This is commonly to set up either sending SNMP traps or emails
+      when a node becomes unhealthy and similar.
+    </para>
+    <para>
+      This is activated by setting CTDB_NOTIFY_SCRIPT=&lt;your script&gt; in the
+       sysconfig file, or by adding --notification-script=&lt;your script&gt;.
+    </para>
+    <para>
+      See /etc/ctdb/notify.sh for an example script.
+    </para>
+    <para>
+      CTDB currently generates notifications on these state changes:
+    </para>
+
+    <refsect2><title>unhealthy</title>
+    <para>
+      This call-out is triggered when the node changes to UNHEALTHY state.
+    </para>
+    </refsect2>
+
+    <refsect2><title>healthy</title>
+    <para>
+      This call-out is triggered when the node changes to HEALTHY state.
+    </para>
+    </refsect2>
+
+    <refsect2><title>startup</title>
+    <para>
+      This call-out is triggered when ctdb has started up and all managed services are up and running.
+    </para>
+    </refsect2>
+
+  </refsect1>
+
+
+<refsect1><title>ClamAV Daemon</title>
+<para>
+CTDB has support to manage the popular anti-virus daemon ClamAV.
+This support is implemented through the
+eventscript : /etc/ctdb/events.d/31.clamd.
+</para>
+      
+<refsect2><title>Configuration</title>
+<para>
+Start by configuring CLAMAV normally and test that it works. Once this is
+done, copy the configuration files over to all the nodes so that all nodes
+share identical CLAMAV configurations.
+Once this is done you can proceed with the intructions below to activate
+CTDB support for CLAMAV.
+</para>
+
+<para>
+First, to activate CLAMAV support in CTDB, edit /etc/sysconfig/ctdb and add the two lines :
+</para>
+<screen format="linespecific">
+CTDB_MANAGES_CLAMD=yes
+CTDB_CLAMD_SOCKET="/path/to/clamd.socket"
+</screen>
+
+<para>
+Second, activate the eventscript
+</para>
+<screen format="linespecific">
+ctdb enablescript 31.clamd
+</screen>
+
+<para>
+Third, CTDB will now be starting and stopping this service accordingly,
+so make sure that the system is not configured to start/stop this service
+automatically.
+On RedHat systems you can disable the system starting/stopping CLAMAV automatically by running :
+<screen format="linespecific">
+chkconfig clamd off
+</screen>
+</para>
+
+
+<para>
+Once you have restarted CTDBD, use
+<screen format="linespecific">
+ctdb scriptstatus
+</screen>
+and verify that the 31.clamd eventscript is listed and that it was executed successfully.
+</para>
+
+</refsect2>
+</refsect1>
+
+
+
  
    <refsect1><title>SEE ALSO</title>
      <para>