#!/bin/bash
#
# cman - Cluster Manager init script
#
# chkconfig: - 21 79
# description: Starts and stops cman
#
#
### BEGIN INIT INFO
# Provides:		cman
# Required-Start:	$network
# Required-Stop:	$network
# Default-Start:
# Default-Stop:
# Short-Description:	Starts and stops cman
# Description:		Starts and stops the Cluster Manager set of daemons
### END INIT INFO

. /etc/init.d/functions

if [ -f /etc/sysconfig/cman ]; then
    . /etc/sysconfig/cman
    echo "/etc/sysconfig/cman has been deprecated in favour of /etc/sysconfig/cluster"
    echo "Please consider switching to the new file"
fi

[ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster

[ -z "$CCSD_OPTS" ] && CCSD_OPTS=

# CMAN_CLUSTER_TIMEOUT -- amount of time to wait for joinging a cluster
#     before giving up.  If CMAN_CLUSTER_TIMEOUT is positive, then we will
#     wait CMAN_CLUSTER_TIMEOUT seconds before giving up and failing when
#     a cluster is not joined.  If CMAN_CLUSTER_TIMEOUT is zero, then
#     wait indefinately for a cluster join.  If CMAN_CLUSTER_TIMEOUT is
#     negative, do not check to see that the cluster has been joined
[ -z "$CMAN_CLUSTER_TIMEOUT" ] && CMAN_CLUSTER_TIMEOUT=120

# CMAN_QUORUM_TIMEOUT -- amount of time to wait for a quorate cluster on 
#     startup quorum is needed by many other applications, so we may as 
#     well wait here.  If CMAN_QUORUM_TIMEOUT is less than 1, quorum will 
#     be ignored.
[ -z "$CMAN_QUORUM_TIMEOUT" ] && CMAN_QUORUM_TIMEOUT=0

# CMAN_SHUTDOWN_TIMEOUT -- amount of time to wait for cman to become a 
#     cluster member before calling cman_tool leave during shutdown.  
#     The default is 60 seconds
[ -z "$CMAN_SHUTDOWN_TIMEOUT" ] && CMAN_SHUTDOWN_TIMEOUT=60

# FENCED_START_TIMEOUT -- amount of time to wait for starting fenced
#     before giving up.  If FENCED_START_TIMEOUT is positive, then we will
#     wait FENCED_START_TIMEOUT seconds before giving up and failing when
#     fenced does not start.  If FENCED_START_TIMEOUT is zero, then
#     wait indefinately for fenced to start.
[ -z "$FENCED_START_TIMEOUT" ] && FENCED_START_TIMEOUT=300

# NET_RMEM_DEFAULT -- minimum value for rmem_default. If this is set
# higher elsewhere it will not be reduced here.
# These two values are only really needed for the DLM when using sctp
# but do no harm.
[ -z "$NET_RMEM_DEFAULT" ] && NET_RMEM_DEFAULT=4194304

# NET_RMEM_MAX -- minimum value for rmem_max. If this is set
# higher elsewhere it will not be reduced here.
[ -z "$NET_RMEM_MAX" ] && NET_RMEM_MAX=4194304

# FENCE_JOIN -- boolean value used to control whether or not this node
#     should join the fence domain. If FENCE_JOIN is set to "no", then
#     the script will not attempt to the fence domain. If FENCE_JOIN is
#     set to "yes", then the script will attempt to join the fence domain.
#     If FENCE_JOIN is set to any other value, the default behavior is
#     to join the fence domain (equivalent to "yes").
[ -z "$FENCE_JOIN" ] && FENCE_JOIN="yes"

[ -z "$LOCK_FILE" ] && LOCK_FILE="/var/lock/subsys/cman"

[ -n "$CLUSTERNAME" ] && cman_join_opts="-c $CLUSTERNAME"

[ -n "$NODENAME" ] && cman_join_opts+=" -n $NODENAME"

load_modules()
{
    errmsg=$( /sbin/modprobe configfs 2>&1 ) || return 1
    errmsg=$( /sbin/modprobe dlm 2>&1 ) || return 1
    errmsg=$( /sbin/modprobe lock_dlm 2>&1 ) || return 1
    return 0
}

start_configfs()
{
    # configfs
    awk '{ print $2 }' /etc/mtab | grep "/sys/kernel/config" &> /dev/null \
    && awk '{ print $3 }' /etc/mtab | grep "configfs" &> /dev/null
    if [ $? -ne 0 ]
    then
	errmsg=$( /bin/mount -t configfs none /sys/kernel/config 2>&1 )
	return $?
    fi
    return 0
}

start_ccsd()
{
    # ccsd
    status ccsd &> /dev/null
    if [ $? -ne 0 ]
    then
	errmsg=$(/sbin/ccsd $CCSD_OPTS 2>&1)
	rtrn=$?
	return $rtrn
    fi
    return 0
}

start_cman()
{
    # cman
    /sbin/cman_tool status &> /dev/null
    if [ $? -ne 0 ]
    then
	errmsg=$( /sbin/cman_tool -t $CMAN_CLUSTER_TIMEOUT -w join \
	    $cman_join_opts 2>&1 ) || return 1

	if [ $CMAN_QUORUM_TIMEOUT -gt 0 ]
	then
	    errmsg=$( /sbin/cman_tool -t $CMAN_QUORUM_TIMEOUT \
		    -q wait 2>&1 ) || return 1
	fi
    fi
    return 0
}


start_qdiskd()
{
    declare current_runlevel

    current_runlevel=$(/sbin/runlevel 2>/dev/null | awk '{ print $2 }' 2>/dev/null)
    #
    # Start qdiskd before fenced to resolve bug #436381.  This only
    # happens if qdiskd is configured to run in the runlevel we are in
    #
    /sbin/chkconfig --levels "$current_runlevel" qdiskd 2>/dev/null
    if [ $? -ne 0 ]; then
        # qdiskd doesn't start at this runlevel.
        return 0
    fi

    echo -n "   Starting qdiskd... "
    /sbin/service qdiskd start &> /dev/null
    if [ $? -eq 0 ] 
    then
	echo "done"
    else
	echo "failed"
	return 1
    fi
    return 0
}


start_daemons()
{
    status groupd &> /dev/null
    if [ $? -ne 0 ]; then
	errmsg=$( /sbin/groupd 2>&1 ) || return 1
    fi
    status fenced &> /dev/null
    if [ $? -ne 0 ]; then
	errmsg=$( /sbin/fenced 2>&1 ) || return 1
    fi
    status dlm_controld &> /dev/null
    if [ $? -ne 0 ]; then
	errmsg=$( /sbin/dlm_controld 2>&1 ) || return 1
    fi
    status gfs_controld &> /dev/null
    if [ $? -ne 0 ]; then
	errmsg=$( /sbin/gfs_controld 2>&1 ) || return 1
    fi
    return 0
}

start_fence()
{
    errmsg=$( /sbin/fence_tool -w -t $FENCED_START_TIMEOUT join \
	     > /dev/null 2>&1 ) || return 1
    return 0
}

start_fence_xvmd()
{
    status fence_xvmd &> /dev/null
    if [ $? -ne 0 ]; then
	errmsg=$( /sbin/fence_xvmd $FENCE_XVMD_OPTS 2>&1 ) || return 1
    fi
    return 0
}

xend_bridged_net_enabled() {
	# Not a xen kernel
	test -d /proc/xen || return 1

    current_runlevel=$(/sbin/runlevel 2>/dev/null | awk '{ print $2 }' 2>/dev/null)
    if [ -z "$current_runlevel" ]; then
        errmsg='Unable to determine the current runlevel'
        return 1
    fi

    /sbin/chkconfig --levels "$current_runlevel" xend 2>/dev/null
    if [ $? -ne 0 ]; then
        # xend doesn't start at this runlevel.
        return 1
    fi

    if [ ! -f /etc/xen/xend-config.sxp ]; then
        # xend isn't configured to use bridged networking.
        return 1
    fi

    egrep "^[[:blank:]]*\([[:blank:]]*network-script[[:blank:]]+(')?[[:blank:]]*network-bridge([[:blank:]]*\)|[[:blank:]]+)" /etc/xen/xend-config.sxp >&/dev/null
    if [ $? -ne 0 ]; then
        # xend isn't configured to use bridged networking.
        return 1
    fi
    return 0
}

xend_bridged_net_start() {
    if [ ! -x /etc/xen/scripts/network-bridge ]; then
        if [ -f /etc/xen/scripts/network-bridge ]; then
            errmsg='The xend bridged network script cannot be run'
        else
            errmsg='The xend bridged network script is missing'
        fi
        return 1
    fi

    /sbin/modprobe netbk >& /dev/null
    /sbin/modprobe netloop >& /dev/null
    bridge_parms=`egrep -m 1 "^[[:blank:]]*\([[:blank:]]*network-script[[:blank:]]+(')?[[:blank:]]*network-bridge([[:blank:]]*\)|[[:blank:]]+)" /etc/xen/xend-config.sxp| sed -r "s/^[[:blank:]]*\([[:blank:]]*network-script[[:blank:]]+'?[[:blank:]]*network-bridge[[:blank:]]*//; s/'?[[:blank:]]*\).*//"`
    errmsg=$(/etc/xen/scripts/network-bridge start $bridge_parms 2>&1) || return 1
    return 0
}

fence_xvmd_enabled()
{
    #
    # Check the value of FENCE_JOIN.
    # If FENCE_JOIN is set to "no", then we should disable fence_xvm.
    #
    if [ "$FENCE_JOIN" == "no" ]; then
	return 1
    fi

    #
    # Check for the 'xm' binary.  If it's not here, we are not
    # running on a machine capable of running xvmd.
    #
    which xm &> /dev/null || return 1

    #
    # Check for presence of /cluster/fence_xvmd in cluster.conf
    # (If -X is specified, it doesn't matter if it's in cluster.conf;
    #  we'll start it anyway since ccsd is not required)
    #
    /sbin/cman_tool status &> /dev/null
    if [ $? -eq 0 ]
    then
	if [ "$FENCE_XVMD_OPTS" = "${FENCE_XVMD_OPTS/-X/}" ]; then
		ccs_tool query /cluster/fence_xvmd || return 1
	fi
    fi

    return 0
}

set_networking_params()
{
    if [ ! -f  /proc/sys/net/core/rmem_default ]
    then
	return 0;
    fi
   
    value="$(cat /proc/sys/net/core/rmem_default)"
    if [ $value -le $NET_RMEM_DEFAULT ]
    then
	echo $NET_RMEM_DEFAULT > /proc/sys/net/core/rmem_default
    fi
    
    value="$(cat /proc/sys/net/core/rmem_max)"
    if [ $value -le $NET_RMEM_MAX ]
        then
        echo $NET_RMEM_MAX > /proc/sys/net/core/rmem_max
    fi
}

fence_join_enabled()
{
    #
    # Check the value of FENCE_JOIN.
    # If FENCE_JOIN is set to "no", we will not attempt to join
    # the fence domain. If FENCE_JOIN is set to any other value,
    # we will attempt to join the fence domain (default).
    #
    if [ "$FENCE_JOIN" == "no" ]; then
	return 1
    else
	return 0
    fi
}

start()
{
    echo "Starting cluster: "

    xend_bridged_net_enabled
    if [ $? -eq 0 ]
    then
        echo -n "   Enabling workaround for Xend bridged networking... "
        xend_bridged_net_start
        if [ $? -eq 0 ] 
        then
            echo "done"
        else
            echo "failed: $errmsg"
			return 1
        fi
    fi

    echo -n "   Loading modules... "
    ulimit -c unlimited
    load_modules
    if [ $? -eq 0 ] 
    then
	echo "done"
    else
	echo "failed"
	return 1
    fi

    echo -n "   Mounting configfs... "
    start_configfs
    if [ $? -eq 0 ] 
    then
	echo "done"
    else
	echo "failed"
	return 1
    fi	

    echo -n "   Setting network parameters... "
    set_networking_params
    if [ $? -eq 0 ] 
    then
	echo "done"
    else
	echo "failed"
	return 1
    fi

    echo -n "   Starting ccsd... "
    start_ccsd
    if [ $? -eq 0 ] 
    then
	echo "done"
    else
	echo "failed"
	return 1
    fi

    echo -n "   Starting cman... "
    start_cman
    if [ $? -eq 0 ] 
    then
	echo "done"
    else
	echo "failed"
	return 1
    fi

    start_qdiskd

    echo -n "   Starting daemons... "
    start_daemons
    if [ $? -eq 0 ] 
    then
	echo "done"
    else
	echo "failed"
	return 1
    fi

    if fence_join_enabled; then
	echo -n "   Starting fencing... "
	start_fence
	if [ $? -eq 0 ] 
	    then
	    echo "done"
	else
	    echo "failed"
	    return 1
	fi
    fi

    if fence_xvmd_enabled; then
	echo -n "   Starting virtual machine fencing host... "
	start_fence_xvmd
	if [ $? -eq 0 ]
	then
	    echo "done"
	else
	    echo "failed"
	return 1
	fi
    fi
    
    return 0
}

stop_configfs()
{
    awk '{ print $2 }' /etc/mtab | grep "/sys/kernel/config" &> /dev/null\
    && awk '{ print $3 }' /etc/mtab | grep "configfs" &> /dev/null
    if [ $? -eq 0 ]
    then
	errmsg=$( /bin/umount /sys/kernel/config 2>&1 )
	if [ $? -ne 0 ]
	then
	    echo -n $errmsg " "
	fi
    fi
    return 0
}

stop_ccsd()
{
    for sec in $(seq 1 10)
    do
	if /sbin/pidof ccsd &> /dev/null
	then
	    # get the pid of ccsd from /var/run/cluster/ccsd.pid
	    # and break if the file is not there
	    [ -r /var/run/cluster/ccsd.pid ] || break

	    pid=$(cat /var/run/cluster/ccsd.pid)
	    /usr/bin/kill $pid &> /dev/null || break
	    
	    sleep 1
	else
	    return 0
	fi
    done
    return 1
}

stop_cman()
{
    /sbin/cman_tool status &> /dev/null
    if [ $? -eq 0 ]
    then
    errmsg=$( /sbin/cman_tool -t $CMAN_SHUTDOWN_TIMEOUT \
	    -w leave $1 2>&1 ) || return 1
    fi
    return 0 # all ok
}

stop_daemons()
{
    if pid=$(/sbin/pidof gfs_controld 2>&1); then
	errmsg=$(/usr/bin/kill $pid 2>&1) || return 1
    fi
    if pid=$(/sbin/pidof dlm_controld 2>&1); then
	errmsg=$(/usr/bin/kill $pid 2>&1) || return 1
    fi
    if pid=$(/sbin/pidof fenced 2>&1); then
	errmsg=$(/usr/bin/kill $pid 2>&1) || return 1
    fi
    if pid=$(/sbin/pidof groupd 2>&1); then
	errmsg=$(/usr/bin/kill $pid 2>&1) || return 1
    fi
    return 0 # all ok
}

stop_fence()
{
    if /sbin/pidof fenced &> /dev/null
    then
	/sbin/fence_tool -w leave > /dev/null 2>&1
	rtrn=$?
	sleep 1 # A bit of time for fenced to exit
	return $rtrn
    fi
    return 0 # all ok
}

stop_fence_xvmd()
{
    if /sbin/pidof fence_xvmd &> /dev/null
    then
    	pkill -TERM fence_xvmd
	sleep 1 # A bit of time for fenced to exit
    fi
    
    [ -z "`pidof fence_xvmd`" ]
    return $?
}

stop()
{
    echo "Stopping cluster: "

    if fence_xvmd_enabled; then
	echo -n "   Stopping virtual machine fencing host... "
	stop_fence_xvmd
	if [ $? -eq 0 ]
	then
	    echo "done"
	else
	    echo "failed"
	    return 1
	fi
    fi

    if fence_join_enabled; then
	echo -n "   Stopping fencing... "
	stop_fence
	if [ $? -eq 0 ]
	    then
	    echo "done"
	else
	    echo "failed"
	    return 1
	fi
    fi

    echo -n "   Stopping cman... "
    if [ $1 ]; then
	stop_cman $1
    else
	stop_cman
    fi
    if [ $? -eq 0 ]
    then
	echo "done"
    else
	echo "failed"
	return 1
    fi

#   stop_daemons
#   [ $? -ne 0 ] && return 1

    echo -n "   Stopping ccsd... "
    stop_ccsd
    if [ $? -eq 0 ]
    then
	echo "done"
    else
	echo "failed"
	return 1
    fi

    echo -n "   Unmounting configfs... "
    stop_configfs
    if [ $? -eq 0 ]
    then
	echo "done"
    else
	echo "failed"
	return 1
    fi

    return 0
}

cmanstatus()
{
	errmsg=$( status ccsd 2>&1) || return 1
	errmsg=$( status groupd 2>&1) || return 1
	errmsg=$( status fenced 2>&1) || return 1
	errmsg=$( status dlm_controld 2>&1) || return 1
	errmsg=$( status gfs_controld 2>&1) || return 1
	
	fence_xvmd_enabled || return 0
	errmsg=$( status fence_xvmd 2>&1) || return 1

	return 0
}

rtrn=1

# See how we were called.
case "$1" in
    start)
	start
	rtrn=$?
	[ $rtrn = 0 ] && touch $LOCK_FILE
	if [ $rtrn -ne 0 ] 
	then
	    echo $errmsg
	    failure "failed to start cman"
	    echo
	else
	    success "start"
	    echo
	fi
	;;
    stop)
	if [ $2 ]; then
	    stop
	else
	    stop remove
	fi
	rtrn=$?
	[ $rtrn = 0 ] && rm -f $LOCK_FILE
	if [ $rtrn -ne 0 ] 
	then
	    echo $errmsg
	    failure "failed to stop cman"
	    echo
	else
	    success "shutdown"
	    echo
	fi
	;;

    restart|reload)
	$0 stop restart
	$0 start
	rtrn=$?
	;;

    status)
	cmanstatus
	rtrn=$?
	if [ $rtrn -ne 0 ] ; then
	    echo $errmsg
	else
	    echo "cman is running."
	fi
	;;

    *)
	    echo $"Usage: $0 {start|stop|reload|restart|status}"
	    ;;
esac

exit $rtrn
