- added monitoring of rpc ports for nfs, and of Samba ports and directories
authorAndrew Tridgell <tridge@samba.org>
Wed, 6 Jun 2007 02:08:42 +0000 (12:08 +1000)
committerAndrew Tridgell <tridge@samba.org>
Wed, 6 Jun 2007 02:08:42 +0000 (12:08 +1000)
- added monitoring of the ethernet link state

When monitoring detects an error, the node loses its public IP address

common/ctdb_tunables.c
config/events.d/10.interface
config/events.d/50.samba
config/events.d/59.nfslock
config/events.d/60.nfs
config/functions

index d417d6c2763988268b054451638b3f1af32fdabc..c799ce4ec4c40f2703d8358b54d38114567b01a8 100644 (file)
@@ -36,7 +36,7 @@ static const struct {
        { "RecoverInterval",   1,  offsetof(struct ctdb_tunable, recover_interval) },
        { "ElectionTimeout",   3,  offsetof(struct ctdb_tunable, election_timeout) },
        { "TakeoverTimeout",   5,  offsetof(struct ctdb_tunable, takeover_timeout) },
-       { "MonitorInterval",  60,  offsetof(struct ctdb_tunable, monitor_interval) },
+       { "MonitorInterval",  15,  offsetof(struct ctdb_tunable, monitor_interval) },
 };
 
 /*
index ea28eb70451b1062410234499f10f1d1fb00779a..a6fcbba9b4e102b07fa830c98a9f2a5493031da1 100755 (executable)
@@ -6,6 +6,9 @@
 # public interface
 
 . /etc/ctdb/functions
+loadconfig ctdb
+
+[ -z "$CTDB_PUBLIC_INTERFACE" ] && exit 0
 
 cmd="$1"
 shift
@@ -73,6 +76,15 @@ case $cmd in
      shutdown)
        ;;
 
+     monitor)
+       [ -x /usr/sbin/ethtool ] && {
+           /usr/sbin/ethtool $CTDB_PUBLIC_INTERFACE | grep 'Link detected: yes' > /dev/null || {
+               echo "`date` ERROR: No link on network interface $CTDB_PUBLIC_INTERFACE"
+               exit 1
+           }
+       }
+       ;;
+
 esac
 
 exit 0
index affd964c7defcb7056c45bcf6f55f3457fd760e8..75342f5f0db36aeb730ea121d7787102d63d0c30 100755 (executable)
@@ -50,6 +50,15 @@ case $cmd in
        service smb stop
        service winbind stop
        ;;
+
+     monitor)
+       smb_dirs=`testparm -st 2> /dev/null | egrep '^\s*path = '  | cut -d= -f2`
+       ctdb_check_directories "Samba" $smb_dirs        
+
+       smb_ports=`testparm -stv 2> /dev/null | egrep '\s*smb ports =' | cut -d= -f2`
+       ctdb_check_tcp_ports "Samba" $smb_ports
+       ;;
+
 esac
 
 # ignore unknown commands
index 4bdf51f778581e94f93e5ae4191641d675aa1e3c..1dba335824d0636fb8d2c37bddadd4bef49e617a 100755 (executable)
@@ -51,6 +51,14 @@ case $cmd in
 
        /bin/rm -f /etc/ctdb/state/statd/restart
        ;;
+
+      monitor)
+       # check that lockd responds to rpc requests
+       ctdb_check_rpc "statd" 100024 1
+       ctdb_check_rpc "lockd" 100021 1
+       ctdb_check_directories "statd" $STATD_SHARED_DIRECTORY
+       ;;
+
 esac
 
 exit 0
index 258a2309d0cebb66c6ba74253eea08c4e4b441df..549d87cc233268ff4947c3208d6ba4a724497301 100755 (executable)
@@ -9,6 +9,8 @@ loadconfig nfs
 cmd="$1"
 shift
 
+PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
+
 case $cmd in 
      startup)
        mkdir -p /etc/ctdb/state/nfs
@@ -45,6 +47,16 @@ case $cmd in
        /bin/rm -f /etc/ctdb/state/nfs/restart
        ;;
 
+      monitor)
+       # check that NFS responds to rpc requests
+       ctdb_check_rpc "NFS" 100003 3
+       ctdb_check_rpc "mount" 100005 1
+
+       # and that its directories are available
+       nfs_dirs=`grep -v '^#' < /etc/exports | cut -d' ' -f1`
+       ctdb_check_directories "nfs" $nfs_dirs
+       ;;
+
 esac
 
 exit 0
index f557d62910719bba52741be0848d9c2691f5d498..4219f2238388796b8e7eda895e75cb2b08bfcaa7 100644 (file)
@@ -28,7 +28,7 @@ service() {
 
 ######################################################
 # wait for a set of tcp ports
-# usage: ctdb_wait_tcp_ports SERICE_NAME <ports...>
+# usage: ctdb_wait_tcp_ports SERVICE_NAME <ports...>
 ######################################################
 ctdb_wait_tcp_ports() {
   service_name="$1"
@@ -59,9 +59,10 @@ ctdb_wait_tcp_ports() {
 }
 
 
+
 ######################################################
 # wait for a set of directories
-# usage: ctdb_wait_directories SERICE_NAME <directories...>
+# usage: ctdb_wait_directories SERVICE_NAME <directories...>
 ######################################################
 ctdb_wait_directories() {
   service_name="$1"
@@ -84,3 +85,58 @@ ctdb_wait_directories() {
   echo "`/bin/date` Local directories for $service_name are available"
 }
 
+
+######################################################
+# check that a rpc server is registered with portmap
+# and responding to requests
+# usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
+######################################################
+ctdb_check_rpc() {
+    service_name="$1"
+    prognum="$2"
+    version="$3"
+    rpcinfo -u localhost $prognum $version > /dev/null || {
+           echo "`date` ERROR: $service_name not responding to rpc requests"
+           exit 1
+    }
+}
+
+######################################################
+# check a set of directories is available
+# usage: ctdb_check_directories SERVICE_NAME <directories...>
+######################################################
+ctdb_check_directories() {
+  service_name="$1"
+  shift
+  wait_dirs="$*"
+  [ -z "$wait_dirs" ] && return;
+  for d in $wait_dirs; do
+      [ -d $d ] || {
+         echo "`date` ERROR: $service_name directory $d not available"
+         exit 1
+      }
+  done
+}
+
+######################################################
+# check a set of tcp ports
+# usage: ctdb_check_tcp_ports SERVICE_NAME <ports...>
+######################################################
+ctdb_check_tcp_ports() {
+  service_name="$1"
+  shift
+  wait_ports="$*"
+  [ -z "$wait_ports" ] && return;
+  for p in $wait_ports; do
+      all_ok=1
+      if [ -x /usr/bin/netcat ]; then
+          /usr/bin/netcat -z 127.0.0.1 $p || all_ok=0
+      elif [ -x /usr/bin/nc ]; then
+          /usr/bin/nc -z 127.0.0.1 $p || all_ok=0
+      fi
+      [ $all_ok -eq 1 ] || {
+         echo "`date` ERROR: $service_name tcp port $p is not responding"
+         exit 1
+      }
+  done
+}