config/functions

   1 # Hey Emacs, this is a -*- shell-script -*- !!!
   2
   3 # utility functions for ctdb event scripts
   4
   5 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
   6
   7 [ -z "$CTDB_VARDIR" ] && {
   8     export CTDB_VARDIR="/var/ctdb"
   9 }
  10 [ -z "$CTDB_ETCDIR" ] && {
  11     export CTDB_ETCDIR="/etc"
  12 }
  13
  14 #######################################
  15 # pull in a system config file, if any
  16 _loadconfig() {
  17
  18     if [ -z "$1" ] ; then
  19         foo="${service_config:-${service_name}}"
  20         if [ -n "$foo" ] ; then
  21             loadconfig "$foo"
  22         fi
  23     elif [ "$1" != "ctdb" ] ; then
  24         loadconfig "ctdb"
  25     fi
  26
  27     if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
  28         . $CTDB_ETCDIR/sysconfig/$1
  29     elif [ -f $CTDB_ETCDIR/default/$1 ]; then
  30         . $CTDB_ETCDIR/default/$1
  31     elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
  32         . $CTDB_BASE/sysconfig/$1
  33     fi
  34 }
  35
  36 loadconfig () {
  37     _loadconfig "$@"
  38 }
  39
  40 ##############################################################
  41 # make sure CTDB_CURRENT_DEBUGLEVEL is set to the desired debug level
  42 # (integer)
  43 #
  44 # If it is already set then do nothing, since it might have been set
  45 # via a file in rc.local.d/.  If it is not set then set it by sourcing
  46 # /var/ctdb/eventscript_debuglevel. If this file does not exist then
  47 # create it using output from "ctdb getdebug".  If the option 1st arg
  48 # is "create" then don't source an existing file but create a new one
  49 # instead - this is useful for creating the file just once in each
  50 # event run in 00.ctdb.  If there's a problem getting the debug level
  51 # from ctdb then it is silently set to 0 - no use spamming logs if our
  52 # debug code is broken...
  53 ctdb_set_current_debuglevel ()
  54 {
  55     [ -z "$CTDB_CURRENT_DEBUGLEVEL" ] || return 0
  56
  57     _f="$CTDB_VARDIR/eventscript_debuglevel"
  58
  59     if [ "$1" = "create" -o ! -r "$_f" ] ; then
  60         _t=$(ctdb getdebug -Y 2>/dev/null)
  61         # get last field of output
  62         _t="${_t%:}"
  63         _t="${_t##*:}"
  64         # Defaults to 0
  65         echo "export CTDB_CURRENT_DEBUGLEVEL=\"${_t:-0}\"" >"$_f"
  66     fi
  67
  68     . "$_f"
  69 }
  70
  71 debug ()
  72 {
  73     if [ $CTDB_CURRENT_DEBUGLEVEL -ge 4 ] ; then
  74         # If there are arguments then echo them.  Otherwise expect to
  75         # use stdin, which allows us to pass lots of debug using a
  76         # here document.
  77         if [ -n "$1" ] ; then
  78             echo "DEBUG: $*"
  79         elif ! tty -s ; then
  80             sed -e 's@^@DEBUG: @'
  81         fi
  82     fi
  83 }
  84
  85 ##############################################################
  86 # determine on what type of system (init style) we are running
  87 detect_init_style() {
  88     # only do detection if not already set:
  89     test "x$CTDB_INIT_STYLE" != "x" && return
  90
  91     if [ -x /sbin/startproc ]; then
  92         CTDB_INIT_STYLE="suse"
  93     elif [ -x /sbin/start-stop-daemon ]; then
  94         CTDB_INIT_STYLE="debian"
  95     else
  96         CTDB_INIT_STYLE="redhat"
  97     fi
  98 }
  99
 100 ######################################################
 101 # simulate /sbin/service on platforms that don't have it
 102 # _service() makes it easier to hook the service() function for
 103 # testing.
 104 _service ()
 105 {
 106   _service_name="$1"
 107   _op="$2"
 108
 109   # do nothing, when no service was specified
 110   [ -z "$_service_name" ] && return
 111
 112   if [ -x /sbin/service ]; then
 113       $_nice /sbin/service "$_service_name" "$_op"
 114   elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
 115       $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
 116   elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
 117       $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
 118   fi
 119 }
 120
 121 service()
 122 {
 123     _nice=""
 124     _service "$@"
 125 }
 126
 127 ######################################################
 128 # simulate /sbin/service (niced) on platforms that don't have it
 129 nice_service()
 130 {
 131     _nice="nice"
 132     _service "$@"
 133 }
 134
 135 ######################################################
 136 # wrapper around /proc/ settings to allow them to be hooked
 137 # for testing
 138 # 1st arg is relative path under /proc/, 2nd arg is value to set
 139 set_proc ()
 140 {
 141     echo "$2" >"/proc/$1"
 142 }
 143
 144 ######################################################
 145 # wrapper around getting file contents from /proc/ to allow
 146 # this to be hooked for testing
 147 # 1st arg is relative path under /proc/
 148 get_proc ()
 149 {
 150     cat "/proc/$1"
 151 }
 152
 153 ######################################################
 154 # Check that an RPC service is healthy -
 155 # this includes allowing a certain number of failures
 156 # before marking the NFS service unhealthy.
 157 #
 158 # usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
 159 #
 160 # each triple is a set of 3 arguments: an operator, a
 161 # fail count limit and an action string.
 162 #
 163 # For example:
 164 #
 165 #       nfs_check_rpc_service "lockd" \
 166 #           -ge 15 "verbose restart unhealthy" \
 167 #           -eq 10 "restart:bs"
 168 #
 169 # says that if lockd is down for 15 iterations then do
 170 # a verbose restart of lockd and mark the node unhealthy.
 171 # Before this, after 10 iterations of failure, the
 172 # service is restarted silently in the background.
 173 # Order is important: the number of failures need to be
 174 # specified in reverse order because processing stops
 175 # after the first condition that is true.
 176 ######################################################
 177 nfs_check_rpc_service ()
 178 {
 179     _prog_name="$1" ; shift
 180
 181     _version=1
 182     _rpc_prog="$_prog_name"
 183     _restart=""
 184     _opts=""
 185     case "$_prog_name" in
 186         knfsd)
 187             _rpc_prog=nfs
 188             _version=3
 189             _restart="echo 'Trying to restart NFS service'"
 190             _restart="${_restart}; startstop_nfs restart"
 191             ;;
 192         mountd)
 193             _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
 194             ;;
 195         rquotad)
 196             _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
 197             ;;
 198         lockd)
 199             _rpc_prog=nlockmgr
 200             _version=4
 201             _restart="echo 'Trying to restart lock manager service'"
 202             _restart="${_restart}; startstop_nfslock restart"
 203             ;;
 204         statd)
 205             _rpc_prog=status
 206             _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
 207             _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
 208             _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
 209             ;;
 210         *)
 211             echo "Internal error: unknown RPC program \"$_prog_name\"."
 212             exit 1
 213     esac
 214
 215     _service_name="nfs_${_prog_name}"
 216
 217     if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
 218         ctdb_counter_init "$_service_name"
 219         return 0
 220     fi
 221
 222     ctdb_counter_incr "$_service_name"
 223
 224     while [ -n "$3" ] ; do
 225         ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || {
 226             for _action in $3 ; do
 227                 case "$_action" in
 228                     verbose)
 229                         echo "$ctdb_check_rpc_out"
 230                         ;;
 231                     restart|restart:*)
 232                         # No explicit command specified, construct rpc command.
 233                         if [ -z "$_restart" ] ; then
 234                             _p="rpc.${_prog_name}"
 235                             _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
 236                             _restart="${_restart}; killall -q -9 $_p"
 237                             _restart="${_restart}; $_p $_opts"
 238                         fi
 239
 240                         # Process restart flags...
 241                         _flags="${_action#restart:}"
 242                         # There may not have been a colon...
 243                         [ "$_flags" != "$_action" ] || _flags=""
 244                         # q=quiet - everything to /dev/null
 245                         if [ "${_flags#*q}" != "$_flags" ] ; then
 246                             _restart="{ ${_restart} ; } >/dev/null 2>&1"
 247                         fi
 248                         # s=stealthy - last command to /dev/null
 249                         if [ "${_flags#*s}" != "$_flags" ] ; then
 250                             _restart="${_restart} >/dev/null 2>&1"
 251                         fi
 252                         # b=background - the whole thing, easy and reliable
 253                         if [ "${_flags#*b}" != "$_flags" ] ; then
 254                             _restart="{ ${_restart} ; } &"
 255                         fi
 256
 257                         # Do it!
 258                         eval "${_restart}"
 259                         ;;
 260                     unhealthy)
 261                         exit 1
 262                         ;;
 263                     *)
 264                         echo "Internal error: unknown action \"$_action\"."
 265                         exit 1
 266                 esac
 267             done
 268
 269             # Only process the first action group.
 270             break
 271         }
 272         shift 3
 273     done
 274 }
 275
 276 ######################################################
 277 # check that a rpc server is registered with portmap
 278 # and responding to requests
 279 # usage: ctdb_check_rpc SERVICE_NAME VERSION
 280 ######################################################
 281 ctdb_check_rpc ()
 282 {
 283     progname="$1"
 284     version="$2"
 285
 286     if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
 287         ctdb_check_rpc_out="ERROR: $progname failed RPC check:
 288 $ctdb_check_rpc_out"
 289         echo "$ctdb_check_rpc_out"
 290         return 1
 291     fi
 292 }
 293
 294 ######################################################
 295 # check a set of directories is available
 296 # return 1 on a missing directory
 297 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
 298 ######################################################
 299 ctdb_check_directories_probe() {
 300     while IFS="" read d ; do
 301         case "$d" in
 302             *%*)
 303                 continue
 304                 ;;
 305             *)
 306                 [ -d "${d}/." ] || return 1
 307         esac
 308     done
 309 }
 310
 311 ######################################################
 312 # check a set of directories is available
 313 # usage: ctdb_check_directories SERVICE_NAME <directories...>
 314 ######################################################
 315 ctdb_check_directories() {
 316     n="${1:-${service_name}}"
 317     ctdb_check_directories_probe || {
 318         echo "ERROR: $n directory \"$d\" not available"
 319         exit 1
 320     }
 321 }
 322
 323 ######################################################
 324 # check a set of tcp ports
 325 # usage: ctdb_check_tcp_ports <ports...>
 326 ######################################################
 327
 328 # This flag file is created when a service is initially started.  It
 329 # is deleted the first time TCP port checks for that service succeed.
 330 # Until then ctdb_check_tcp_ports() prints a more subtle "error"
 331 # message if a port check fails.
 332 _ctdb_check_tcp_common ()
 333 {
 334     _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
 335 }
 336
 337 ctdb_check_tcp_init ()
 338 {
 339     _ctdb_check_tcp_common
 340     mkdir -p "${_ctdb_service_started_file%/*}" # dirname
 341     touch "$_ctdb_service_started_file"
 342 }
 343
 344 ctdb_check_tcp_ports()
 345 {
 346     _ctdb_check_tcp_common
 347
 348     _cmd='netstat -l -t -n'
 349     _ns=$($_cmd)
 350     for _p ; do  # process each function argument (port)
 351         for _a in '0\.0\.0\.0' '::' ; do
 352             _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
 353             if echo "$_ns" | grep -E -q "$_pat" ; then
 354                 # We matched the port, so process next port
 355                 continue 2
 356             fi
 357         done
 358
 359         # We didn't match the port, so flag an error, print some debug
 360         if [ ! -f "$_ctdb_service_started_file" ] ; then
 361             echo "ERROR: $service_name tcp port $_p is not responding"
 362 debug <<EOF
 363 $_cmd shows this output:
 364 $_ns
 365 EOF
 366         else
 367             echo "INFO: $service_name tcp port $_p is not responding"
 368         fi
 369
 370         return 1
 371     done
 372
 373     rm -f "$_ctdb_service_started_file"
 374
 375     return 0
 376 }
 377
 378 ######################################################
 379 # check a unix socket
 380 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
 381 ######################################################
 382 ctdb_check_unix_socket() {
 383     socket_path="$1"
 384     [ -z "$socket_path" ] && return
 385
 386     if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
 387         echo "ERROR: $service_name socket $socket_path not found"
 388         return 1
 389     fi
 390 }
 391
 392 ######################################################
 393 # check a command returns zero status
 394 # usage: ctdb_check_command SERVICE_NAME <command>
 395 ######################################################
 396 ctdb_check_command() {
 397   service_name="$1"
 398   wait_cmd="$2"
 399   [ -z "$wait_cmd" ] && return;
 400   $wait_cmd > /dev/null 2>&1 || {
 401       echo "ERROR: $service_name - $wait_cmd returned error"
 402       exit 1
 403   }
 404 }
 405
 406 ################################################
 407 # kill off any TCP connections with the given IP
 408 ################################################
 409 kill_tcp_connections() {
 410     _IP="$1"
 411     _failed=0
 412
 413     _killcount=0
 414     connfile="$CTDB_VARDIR/state/connections.$_IP"
 415     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
 416     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
 417
 418     while read dest src; do
 419         srcip=`echo $src | sed -e "s/:[^:]*$//"`
 420         srcport=`echo $src | sed -e "s/^.*://"`
 421         destip=`echo $dest | sed -e "s/:[^:]*$//"`
 422         destport=`echo $dest | sed -e "s/^.*://"`
 423         echo "Killing TCP connection $srcip:$srcport $destip:$destport"
 424         ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
 425         case $destport in
 426           # we only do one-way killtcp for CIFS
 427           139|445) : ;;
 428           # for all others we do 2-way
 429           *)
 430                 ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
 431                 ;;
 432         esac
 433         _killcount=`expr $_killcount + 1`
 434      done < $connfile
 435     rm -f $connfile
 436
 437     [ $_failed = 0 ] || {
 438         echo "Failed to send killtcp control"
 439         return;
 440     }
 441     [ $_killcount -gt 0 ] || {
 442         return;
 443     }
 444     _count=0
 445     while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
 446         sleep 1
 447         _count=`expr $_count + 1`
 448         [ $_count -gt 3 ] && {
 449             echo "Timed out killing tcp connections for IP $_IP"
 450             return;
 451         }
 452     done
 453     echo "killed $_killcount TCP connections to released IP $_IP"
 454 }
 455
 456 ##################################################################
 457 # kill off the local end for any TCP connections with the given IP
 458 ##################################################################
 459 kill_tcp_connections_local_only() {
 460     _IP="$1"
 461     _failed=0
 462
 463     _killcount=0
 464     connfile="$CTDB_VARDIR/state/connections.$_IP"
 465     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
 466     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
 467
 468     while read dest src; do
 469         srcip=`echo $src | sed -e "s/:[^:]*$//"`
 470         srcport=`echo $src | sed -e "s/^.*://"`
 471         destip=`echo $dest | sed -e "s/:[^:]*$//"`
 472         destport=`echo $dest | sed -e "s/^.*://"`
 473         echo "Killing TCP connection $srcip:$srcport $destip:$destport"
 474         ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
 475         _killcount=`expr $_killcount + 1`
 476      done < $connfile
 477     rm -f $connfile
 478
 479     [ $_failed = 0 ] || {
 480         echo "Failed to send killtcp control"
 481         return;
 482     }
 483     [ $_killcount -gt 0 ] || {
 484         return;
 485     }
 486     _count=0
 487     while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
 488         sleep 1
 489         _count=`expr $_count + 1`
 490         [ $_count -gt 3 ] && {
 491             echo "Timed out killing tcp connections for IP $_IP"
 492             return;
 493         }
 494     done
 495     echo "killed $_killcount TCP connections to released IP $_IP"
 496 }
 497
 498 ##################################################################
 499 # tickle any TCP connections with the given IP
 500 ##################################################################
 501 tickle_tcp_connections() {
 502     _IP="$1"
 503     _failed=0
 504
 505     _killcount=0
 506     connfile="$CTDB_VARDIR/state/connections.$_IP"
 507     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
 508     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
 509
 510     while read dest src; do
 511         srcip=`echo $src | sed -e "s/:[^:]*$//"`
 512         srcport=`echo $src | sed -e "s/^.*://"`
 513         destip=`echo $dest | sed -e "s/:[^:]*$//"`
 514         destport=`echo $dest | sed -e "s/^.*://"`
 515         echo "Tickle TCP connection $srcip:$srcport $destip:$destport"
 516         ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
 517         echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
 518         ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
 519      done < $connfile
 520     rm -f $connfile
 521
 522     [ $_failed = 0 ] || {
 523         echo "Failed to send tickle control"
 524         return;
 525     }
 526 }
 527
 528 ########################################################
 529 # start/stop the nfs service on different platforms
 530 ########################################################
 531 startstop_nfs() {
 532         PLATFORM="unknown"
 533         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 534                 PLATFORM="sles"
 535         }
 536         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 537                 PLATFORM="rhel"
 538         }
 539
 540         case $PLATFORM in
 541         sles)
 542                 case $1 in
 543                 start)
 544                         service nfsserver start
 545                         ;;
 546                 stop)
 547                         service nfsserver stop > /dev/null 2>&1
 548                         ;;
 549                 restart)
 550                         set_proc "fs/nfsd/threads" 0
 551                         service nfsserver stop > /dev/null 2>&1
 552                         pkill -9 nfsd
 553                         service nfsserver start
 554                         ;;
 555                 esac
 556                 ;;
 557         rhel)
 558                 case $1 in
 559                 start)
 560                         service nfslock start
 561                         service nfs start
 562                         ;;
 563                 stop)
 564                         service nfs stop
 565                         service nfslock stop
 566                         ;;
 567                 restart)
 568                         set_proc "fs/nfsd/threads" 0
 569                         service nfs stop > /dev/null 2>&1
 570                         service nfslock stop > /dev/null 2>&1
 571                         pkill -9 nfsd
 572                         service nfslock start
 573                         service nfs start
 574                         ;;
 575                 esac
 576                 ;;
 577         *)
 578                 echo "Unknown platform. NFS is not supported with ctdb"
 579                 exit 1
 580                 ;;
 581         esac
 582 }
 583
 584 ########################################################
 585 # start/stop the nfs lockmanager service on different platforms
 586 ########################################################
 587 startstop_nfslock() {
 588         PLATFORM="unknown"
 589         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 590                 PLATFORM="sles"
 591         }
 592         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 593                 PLATFORM="rhel"
 594         }
 595
 596         case $PLATFORM in
 597         sles)
 598                 # for sles there is no service for lockmanager
 599                 # so we instead just shutdown/restart nfs
 600                 case $1 in
 601                 start)
 602                         service nfsserver start
 603                         ;;
 604                 stop)
 605                         service nfsserver stop > /dev/null 2>&1
 606                         ;;
 607                 restart)
 608                         service nfsserver stop
 609                         service nfsserver start
 610                         ;;
 611                 esac
 612                 ;;
 613         rhel)
 614                 case $1 in
 615                 start)
 616                         service nfslock start
 617                         ;;
 618                 stop)
 619                         service nfslock stop > /dev/null 2>&1
 620                         ;;
 621                 restart)
 622                         service nfslock stop
 623                         service nfslock start
 624                         ;;
 625                 esac
 626                 ;;
 627         *)
 628                 echo "Unknown platform. NFS locking is not supported with ctdb"
 629                 exit 1
 630                 ;;
 631         esac
 632 }
 633
 634 add_ip_to_iface()
 635 {
 636         local _iface=$1
 637         local _ip=$2
 638         local _maskbits=$3
 639         local _state_dir="$CTDB_VARDIR/state/interface_modify"
 640         local _lockfile="$_state_dir/$_iface.flock"
 641         local _readd_base="$_state_dir/$_iface.readd.d"
 642
 643         mkdir -p $_state_dir || {
 644                 ret=$?
 645                 echo "Failed to mkdir -p $_state_dir - $ret"
 646                 return $ret
 647         }
 648
 649         test -f $_lockfile || {
 650                 touch $_lockfile
 651         }
 652
 653         flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh add "$_iface" "$_ip" "$_maskbits" "$_readd_base"
 654         return $?
 655 }
 656
 657 delete_ip_from_iface()
 658 {
 659         local _iface=$1
 660         local _ip=$2
 661         local _maskbits=$3
 662         local _state_dir="$CTDB_VARDIR/state/interface_modify"
 663         local _lockfile="$_state_dir/$_iface.flock"
 664         local _readd_base="$_state_dir/$_iface.readd.d"
 665
 666         mkdir -p $_state_dir || {
 667                 ret=$?
 668                 echo "Failed to mkdir -p $_state_dir - $ret"
 669                 return $ret
 670         }
 671
 672         test -f $_lockfile || {
 673                 touch $_lockfile
 674         }
 675
 676         flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh delete "$_iface" "$_ip" "$_maskbits" "$_readd_base"
 677         return $?
 678 }
 679
 680 setup_iface_ip_readd_script()
 681 {
 682         local _iface=$1
 683         local _ip=$2
 684         local _maskbits=$3
 685         local _readd_script=$4
 686         local _state_dir="$CTDB_VARDIR/state/interface_modify"
 687         local _lockfile="$_state_dir/$_iface.flock"
 688         local _readd_base="$_state_dir/$_iface.readd.d"
 689
 690         mkdir -p $_state_dir || {
 691                 ret=$?
 692                 echo "Failed to mkdir -p $_state_dir - $ret"
 693                 return $ret
 694         }
 695
 696         test -f $_lockfile || {
 697                 touch $_lockfile
 698         }
 699
 700         flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh readd_script "$_iface" "$_ip" "$_maskbits" "$_readd_base" "$_readd_script"
 701         return $?
 702 }
 703
 704 ########################################################
 705 # some simple logic for counting events - per eventscript
 706 # usage: ctdb_counter_init
 707 #        ctdb_counter_incr
 708 #        ctdb_check_counter_limit <limit>
 709 # ctdb_check_counter_limit succeeds when count >= <limit>
 710 ########################################################
 711 _ctdb_counter_common () {
 712     _service_name="${1:-${service_name}}"
 713     _counter_file="$ctdb_fail_dir/$_service_name"
 714     mkdir -p "${_counter_file%/*}" # dirname
 715 }
 716 ctdb_counter_init () {
 717     _ctdb_counter_common "$1"
 718
 719     >"$_counter_file"
 720 }
 721 ctdb_counter_incr () {
 722     _ctdb_counter_common "$1"
 723
 724     # unary counting!
 725     echo -n 1 >> "$_counter_file"
 726 }
 727 ctdb_check_counter_limit () {
 728     _ctdb_counter_common
 729
 730     _limit="${1:-${service_fail_limit}}"
 731     _quiet="$2"
 732
 733     # unary counting!
 734     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
 735     if [ $_size -ge $_limit ] ; then
 736         echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
 737         exit 1
 738     elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
 739         echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
 740     fi
 741 }
 742 ctdb_check_counter_equal () {
 743     _ctdb_counter_common
 744
 745     _limit=$1
 746
 747     # unary counting!
 748     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
 749     if [ $_size -eq $_limit ] ; then
 750         return 1
 751     fi
 752     return 0
 753 }
 754 ctdb_check_counter () {
 755     _msg="${1:-error}"  # "error"  - anything else is silent on fail
 756     _op="${2:--ge}"  # an integer operator supported by test
 757     _limit="${3:-${service_fail_limit}}"
 758     shift 3
 759     _ctdb_counter_common "$1"
 760
 761     # unary counting!
 762     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
 763     if [ $_size $_op $_limit ] ; then
 764         if [ "$_msg" = "error" ] ; then
 765             echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
 766             exit 1
 767         else
 768             return 1
 769         fi
 770     fi
 771 }
 772
 773 ########################################################
 774
 775 ctdb_status_dir="$CTDB_VARDIR/status"
 776 ctdb_fail_dir="$CTDB_VARDIR/failcount"
 777
 778 ctdb_setup_service_state_dir ()
 779 {
 780     service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
 781     mkdir -p "$service_state_dir" || {
 782         echo "Error creating state dir \"$service_state_dir\""
 783         exit 1
 784     }
 785 }
 786
 787 ########################################################
 788 # Managed status history, for auto-start/stop
 789
 790 ctdb_managed_dir="$CTDB_VARDIR/managed_history"
 791
 792 _ctdb_managed_common ()
 793 {
 794     _service_name="${1:-${service_name}}"
 795     _ctdb_managed_file="$ctdb_managed_dir/$_service_name"
 796 }
 797
 798 ctdb_service_managed ()
 799 {
 800     _ctdb_managed_common "$@"
 801     mkdir -p "$ctdb_managed_dir"
 802     touch "$_ctdb_managed_file"
 803 }
 804
 805 ctdb_service_unmanaged ()
 806 {
 807     _ctdb_managed_common "$@"
 808     rm -f "$_ctdb_managed_file"
 809 }
 810
 811 is_ctdb_previously_managed_service ()
 812 {
 813     _ctdb_managed_common "$@"
 814     [ -f "$_ctdb_managed_file" ]
 815 }
 816
 817 ########################################################
 818 # Check and set status
 819
 820 log_status_cat ()
 821 {
 822     echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
 823 }
 824
 825 ctdb_checkstatus ()
 826 {
 827     if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
 828         log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
 829         return 1
 830     elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
 831         log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
 832         return 2
 833     else
 834         return 0
 835     fi
 836 }
 837
 838 ctdb_setstatus ()
 839 {
 840     d="$ctdb_status_dir/$script_name"
 841     case "$1" in
 842         unhealthy|banned)
 843             mkdir -p "$d"
 844             cat "$2" >"$d/$1"
 845             ;;
 846         *)
 847             for i in "banned" "unhealthy" ; do
 848                 rm -f "$d/$i"
 849             done
 850             ;;
 851     esac
 852 }
 853
 854 ##################################################################
 855 # Reconfigure a service on demand
 856
 857 _ctdb_service_reconfigure_common ()
 858 {
 859     _d="$ctdb_status_dir/${1:-${service_name}}"
 860     mkdir -p "$_d"
 861     _ctdb_service_reconfigure_flag="$_d/reconfigure"
 862 }
 863
 864 ctdb_service_needs_reconfigure ()
 865 {
 866     _ctdb_service_reconfigure_common "$@"
 867     [ -e "$_ctdb_service_reconfigure_flag" ]
 868 }
 869
 870 ctdb_service_set_reconfigure ()
 871 {
 872     _ctdb_service_reconfigure_common "$@"
 873     >"$_ctdb_service_reconfigure_flag"
 874 }
 875
 876 ctdb_service_unset_reconfigure ()
 877 {
 878     _ctdb_service_reconfigure_common "$@"
 879     rm -f "$_ctdb_service_reconfigure_flag"
 880 }
 881
 882 ctdb_service_reconfigure ()
 883 {
 884     echo "Reconfiguring service \"$service_name\"..."
 885     ctdb_service_unset_reconfigure "$@"
 886     service_reconfigure "$@" || return $?
 887     ctdb_counter_init "$@"
 888 }
 889
 890 # Default service_reconfigure() function.
 891 service_reconfigure ()
 892 {
 893     service "${1:-$service_name}" restart
 894 }
 895
 896 ctdb_service_check_reconfigure ()
 897 {
 898     # Only do this for certain events.
 899     case "$event_name" in
 900         monitor|ipreallocated) : ;;
 901         *) return 0
 902     esac
 903
 904     if ctdb_service_needs_reconfigure "$@" ; then
 905         ctdb_service_reconfigure "$@"
 906
 907         # Fall through to non-monitor events.
 908         [ "$event_name" = "monitor" ] || return 0
 909
 910         # We don't want to proceed with the rest of the monitor event
 911         # here, so we exit.  However, if we exit 0 then, if the
 912         # service was previously broken, we might return a false
 913         # positive.  So we simply retrieve the status of this script
 914         # from the previous monitor loop and exit with that status.
 915         ctdb scriptstatus | \
 916             grep -q -E "^${script_name}[[:space:]]+Status:OK[[:space:]]"
 917         exit $?
 918     fi
 919 }
 920
 921 ##################################################################
 922 # Does CTDB manage this service? - and associated auto-start/stop
 923
 924 ctdb_compat_managed_service ()
 925 {
 926     if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then
 927         CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
 928     fi
 929 }
 930
 931 is_ctdb_managed_service ()
 932 {
 933     _service_name="${1:-${service_name}}"
 934
 935     # $t is used just for readability and to allow better accurate
 936     # matching via leading/trailing spaces
 937     t=" $CTDB_MANAGED_SERVICES "
 938
 939     # Return 0 if "<space>$_service_name<space>" appears in $t
 940     if [ "${t#* ${_service_name} }" != "${t}" ] ; then
 941         return 0
 942     fi
 943
 944     # If above didn't match then update $CTDB_MANAGED_SERVICES for
 945     # backward compatibility and try again.
 946     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
 947     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
 948     ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
 949     ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
 950     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
 951     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
 952     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
 953     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
 954     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
 955
 956     t=" $CTDB_MANAGED_SERVICES "
 957
 958     # Return 0 if "<space>$_service_name<space>" appears in $t
 959     [ "${t#* ${_service_name} }" != "${t}" ]
 960 }
 961
 962 ctdb_start_stop_service ()
 963 {
 964     _service_name="${1:-${service_name}}"
 965
 966     [ "$event_name" = "monitor" ] || return 0
 967
 968     if is_ctdb_managed_service "$_service_name" ; then
 969         if ! is_ctdb_previously_managed_service "$_service_name" ; then
 970             echo "Starting service \"$_service_name\" - now managed"
 971             ctdb_service_start "$_service_name"
 972             exit $?
 973         fi
 974     else
 975         if is_ctdb_previously_managed_service "$_service_name" ; then
 976             echo "Stopping service \"$_service_name\" - no longer managed"
 977             ctdb_service_stop "$_service_name"
 978             exit $?
 979         fi
 980     fi
 981 }
 982
 983 ctdb_service_start ()
 984 {
 985     # The service is marked managed if we've ever tried to start it.
 986     ctdb_service_managed "$@"
 987
 988     # Here we only want $1.  If no argument is passed then
 989     # service_start needs to know.
 990     service_start "$@" || return $?
 991
 992     ctdb_counter_init "$@"
 993     ctdb_check_tcp_init
 994 }
 995
 996 ctdb_service_stop ()
 997 {
 998     ctdb_service_unmanaged "$@"
 999     service_stop "$@"
1000 }
1001
1002 # Default service_start() and service_stop() functions.
1003
1004 # These may be overridden in an eventscript.  When overriding, the
1005 # following convention must be followed.  If these functions are
1006 # called with no arguments then they may use internal logic to
1007 # determine whether the service is managed and, therefore, whether
1008 # they should take any action.  However, if the service name is
1009 # specified as an argument then an attempt must be made to start or
1010 # stop the service.  This is because the auto-start/stop code calls
1011 # them with the service name as an argument.
1012 service_start ()
1013 {
1014     service "${1:-${service_name}}" start
1015 }
1016
1017 service_stop ()
1018 {
1019     service "${1:-${service_name}}" stop
1020 }
1021
1022 ##################################################################
1023
1024 ctdb_standard_event_handler ()
1025 {
1026     case "$1" in
1027         status)
1028             ctdb_checkstatus
1029             exit
1030             ;;
1031         setstatus)
1032             shift
1033             ctdb_setstatus "$@"
1034             exit
1035             ;;
1036     esac
1037 }
1038
1039 ipv4_host_addr_to_net_addr()
1040 {
1041         local HOST=$1
1042         local MASKBITS=$2
1043
1044         local HOST0=$(echo $HOST | awk -F . '{print $4}')
1045         local HOST1=$(echo $HOST | awk -F . '{print $3}')
1046         local HOST2=$(echo $HOST | awk -F . '{print $2}')
1047         local HOST3=$(echo $HOST | awk -F . '{print $1}')
1048
1049         local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) ))
1050
1051         local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
1052
1053         local NET_NUM=$(( $HOST_NUM & $MASK_NUM))
1054
1055         local NET0=$(( $NET_NUM & 255 ))
1056         local NET1=$(( ($NET_NUM & (255 * 256)) / 256 ))
1057         local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 ))
1058         local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 ))
1059
1060         echo "$NET3.$NET2.$NET1.$NET0"
1061 }
1062
1063 ipv4_maskbits_to_net_mask()
1064 {
1065         local MASKBITS=$1
1066
1067         local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
1068
1069         local MASK0=$(( $MASK_NUM & 255 ))
1070         local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 ))
1071         local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 ))
1072         local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 ))
1073
1074         echo "$MASK3.$MASK2.$MASK1.$MASK0"
1075 }
1076
1077 ipv4_is_valid_addr()
1078 {
1079         local ADDR=$1
1080         local fail=0
1081
1082         local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'`
1083         test -n "$N" && fail=1
1084
1085         local ADDR0=$(echo $ADDR | awk -F . '{print $4}')
1086         local ADDR1=$(echo $ADDR | awk -F . '{print $3}')
1087         local ADDR2=$(echo $ADDR | awk -F . '{print $2}')
1088         local ADDR3=$(echo $ADDR | awk -F . '{print $1}')
1089
1090         test "$ADDR0" -gt 255 && fail=1
1091         test "$ADDR1" -gt 255 && fail=1
1092         test "$ADDR2" -gt 255 && fail=1
1093         test "$ADDR3" -gt 255 && fail=1
1094
1095         test x"$fail" != x"0" && {
1096                 #echo "IPv4: '$ADDR' is not a valid address"
1097                 return 1;
1098         }
1099
1100         return 0;
1101 }
1102
1103 # iptables doesn't like being re-entered, so flock-wrap it.
1104 iptables()
1105 {
1106         flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
1107 }
1108
1109 ########################################################
1110 # tickle handling
1111 ########################################################
1112
1113 # Temporary directory for tickles.
1114 tickledir="$CTDB_VARDIR/state/tickles"
1115 mkdir -p "$tickledir"
1116
1117 update_tickles ()
1118 {
1119         _port="$1"
1120
1121         mkdir -p "$tickledir" # Just in case
1122
1123         # Who am I?
1124         _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
1125
1126         # What public IPs do I hold?
1127         _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
1128
1129         # IPs as a regexp choice
1130         _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
1131
1132         # Record connections to our public IPs in a temporary file
1133         _my_connections="${tickledir}/${_port}.connections"
1134         rm -f "$_my_connections"
1135         netstat -tn |
1136         awk -v destpat="^${_ipschoice}:${_port}\$" \
1137           '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
1138         sort >"$_my_connections"
1139
1140         # Record our current tickles in a temporary file
1141         _my_tickles="${tickledir}/${_port}.tickles"
1142         rm -f "$_my_tickles"
1143         for _i in $_ips ; do
1144                 ctdb -Y gettickles $_i $_port |
1145                 awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1146         done |
1147         sort >"$_my_tickles"
1148
1149         # Add tickles for connections that we haven't already got tickles for
1150         comm -23 "$_my_connections" "$_my_tickles" |
1151         while read _src _dst ; do
1152                 ctdb addtickle $_src $_dst
1153         done
1154
1155         # Remove tickles for connections that are no longer there
1156         comm -13 "$_my_connections" "$_my_tickles" |
1157         while read _src _dst ; do
1158                 ctdb deltickle $_src $_dst
1159         done
1160
1161         rm -f "$_my_connections" "$_my_tickles"
1162 }
1163
1164 ########################################################
1165 # load a site local config file
1166 ########################################################
1167
1168 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1169         . "$CTDB_RC_LOCAL"
1170 }
1171
1172 [ -x $CTDB_BASE/rc.local ] && {
1173         . $CTDB_BASE/rc.local
1174 }
1175
1176 [ -d $CTDB_BASE/rc.local.d ] && {
1177         for i in $CTDB_BASE/rc.local.d/* ; do
1178                 [ -x "$i" ] && . "$i"
1179         done
1180 }
1181
1182 # We'll call this here to ensure $CTDB_CURRENT_DEBUGLEVEL is set.
1183 # This gives us a chance to override the debug level using a file in
1184 # $CTDB_BASE/rc.local.d/.
1185 ctdb_set_current_debuglevel
1186
1187 script_name="${0##*/}"       # basename
1188 service_name="$script_name"  # default is just the script name
1189 service_fail_limit=1
1190 event_name="$1"