#!/bin/bash
#
# 
#
# chkconfig: 345 22 78
# description: start/stop the gulm lock daemon
#
#	       
### BEGIN INIT INFO
# Provides: 
### END INIT INFO

# GULM_QUORUM_TIMEOUT -- amount of time to wait for there to be a master
#     before giving up.  If GULM_QUORUM_TIMEOUT is positive, then we will
#     wait GULM_QUORUM_TIMEOUT seconds before giving up and failing when
#     a master server is not found.  If GULM_QUORUM_TIMEOUT is zero, then
#     wait indefinately for a master server.  If GULM_QUORUM_TIMEOUT is
#     negative, just start lock_gulmd and not worry about whether it is
#     quorate.
GULM_QUORUM_TIMEOUT=300

. /etc/init.d/functions
[ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster


# GULM_OPTS -- commandline areguments for lock_gulmd.  If this parameter 
#     is not set, the it defaults to "-n $CLUSTER_NAME --use_ccs" if
#     CLUSTER_NAME is defined, otherwise it just defaults to "--use_ccs"
if [ -z "$GULM_OPTS" ]
then
	if [ -n "$CLUSTER_NAME" ]
	then
		GULM_OPTS="$GULM_OPTS -n $CLUSTER_NAME"
	fi

	GULM_OPTS="$GULM_OPTS --use_ccs"
fi


gulm_shutdown()
{
	rtrn=1
	if gulm_tool shutdown localhost &> /dev/null
	then
		for sec in $(seq 1 10 )
		do
			sleep 1
			if ! gulm_tool shutdown localhost &> /dev/null
			then
				rtrn=0
				break
			fi
		done
	fi
	return $rtrn
}

# get the servers list and stuff them into $serverlist
get_serverlist()
{
	serverlist=$( gulm_tool config localhost 2>/dev/null | 
		grep "servernames" | sed "s/^.*= //;s/,/ /g" )
}

# get the nodelist names that have logged into the cluster
# and stuff them into $nodelist
get_nodelist()
{
	nodelist=$( gulm_tool nodelist localhost | 
		grep Name | sed "s/^.*Name: //" )
}

# find the master server and store the value in $gulm_master
# return:
#  0 - success
#  1 - no master found
#  2 - gulm_tool error
#  3 - we can not log in because we are expired
find_master()
{
	gulm_master=""
	line=$(gulm_tool getstats localhost 2>/dev/null |
		awk 'BEGIN{xit=1}
			($1 == "I_am"){xit=0}
			($0 ~ /^(I_am = Master|Master =)/) {print}
			END{exit xit}')
	[ $? -ne 0 ] && return 2 # gulm_tool error

	case $line in
		I_am\ =\ Master)
			gulm_master=$(hostname)
			;;

		Master\ =*)
			server=${line#*= }

			if gulm_tool getstats $server 2>/dev/null |
				grep -q "I_am = Master"
			then
				gulm_master=$server
			fi
			;;

		*)	# This might mean we are in the expired state
			[ -z "$serverlist" ] && get_serverlist

			for server in $serverlist
			do
				if gulm_tool nodeinfo $server $(uname -n) 2>/dev/null |
					grep -iq "^ *state = expired"
				then
					return 3 # fence error
				fi
			done
			;;
	esac

	if [ -n "$gulm_master" ] 
	then
		return 0 # master found
	else
		return 1 # master not found
	fi
}

wait_for_master()
{
	i=0
	rtrn=0
	stoptime=$(($SECONDS + $GULM_QUORUM_TIMEOUT))
	while [ $GULM_QUORUM_TIMEOUT -eq 0 -o $SECONDS -lt $stoptime ]
	do
		find_master 
		rtrn=$?
		case $rtrn in
		0) break ;; # master was found
		1) ;;       # master was not found
		2) break ;; # gulm_tool error
		3) break ;; # we are expired
		esac

		sleep 5
		i=$(($i+1))
	done

	return $rtrn
}


start()
{
	echo -n "Starting lock_gulmd:"

	# If gulm is using ccs, then make sure that there is a gulm
	# section in /etc/cluster/cluster.conf, otherwise abort.
	# FIXME -- Should this be silent?  I think users should get some
	#          feedback, but others might not want added verbosity to
	#          the boot process.  Oh well... it's only one line :)
	if echo "$GULM_OPTS" | grep -qE "(--use_ccs|[\t ]-[VhCed]*c)" 
	then
		if ! [ -r /etc/cluster/cluster.conf ]
		then
			failure "/etc/cluster/cluster.conf was not detected"
			echo
			return 1
		elif ! grep -qE "<[[:space:]]*gulm([[:space:]]|[>]|$)" /etc/cluster/cluster.conf 
		then
			warning "no <gulm> section detected in /etc/cluster/cluster.conf"
			echo
			return 0
		fi
	fi

	sts=1

	if gulm_tool getstats localhost &>/dev/null 
	then
		success "startup"
		echo
		return 0
	fi

	# start lock_gulmd and wait for the ltpx process to fork and connect
	# before continuing
	if lock_gulmd $GULM_OPTS &> /dev/null
	then
		for i in $(seq 1 10)
		do
			sleep 1
			if gulm_tool getstats localhost:ltpx &> /dev/null
			then
				sts=0
				break
			fi
		done
	fi

	# Wait for gulm to be quorate before continuing.  If quorum is not 
	# achieved in a set period of time, then 
	if [ $sts -eq 0 ]
	then
		wait_for_master
		rtrn=$?
		if [ $rtrn = 0 ]
		then
			success "startup"
		elif [ $rtrn = 3 ]
		then
			echo -n " waiting to be fenced "
			gulm_shutdown
			failure "startup"
			sts=1
		else
			echo -n " failed to login to master "
			gulm_shutdown
			failure "startup"
			sts=1
		fi
	else
		echo -n " failed to start ltpx "
		gulm_shutdown
		failure "startup"
	fi
	echo
	return $sts
}

stop()
{
	if [ "$1" = "force" ] ; then force=0 ; else force=1 ; fi

	echo -n "Stopping lock_gulmd:"

	do_shutdown=0
	if gulm_tool servicelist localhost &> /dev/null
	then
		# ignore LTPX and LT000-LT999  and Magma services
		if gulm_tool servicelist localhost | 
			grep -vE "^(LTPX|LT[0-9][0-9][0-9]|Magma::[0-9]*)\$"
		then
			if [ $force -ne 0 ]
			then
				echo "lock_gulmd in use.  failing to stop"
				return 1
			else
				echo "lock_gulmd in use.  force shutdown in 5 seconds. " \
					"Ctrl-C to abort..."
				sleep 5
				do_shutdown=1
			fi
		fi
	else
		if pidof lock_gulmd &> /dev/null
		then
			failure "unable to comminucate to lock_gulmd"
			echo
			return 1
		else
			success "shutdown"
			echo
			return 0
		fi
	fi

	rank=$( gulm_tool getstats localhost | grep rank | sed "s/^ *rank = //")

	if [ $rank -ge 0 -a $force -ne 0 ]
	then
		# we are in the servers list
		myname=$(uname -n)
		my_gid=$(gulm_tool getstats localhost 2>/dev/null | grep GenerationID)
		get_serverlist
		warn_msg=" waiting for clients to logout "

		while [ $do_shutdown -eq 0 ]
		do
			get_nodelist
	
			# remove servers from nodelist
			for server in $serverlist
			do
				nodelist=$( echo "$nodelist" | grep -v $server )
			done
	
			# check to see if there are any clients logged in.  If there
			# are not, we shutdown
			cli_logged_in=0
			for client in $nodelist
			do
				node_gid=$( gulm_tool getstats $client 2>/dev/null | 
					grep GenerationID )
				if [ "$node_gid" = "$my_gid" ] 
				then
					cli_logged_in=1
					break
				fi
			done
			if [ $cli_logged_in -eq 0 ]
			then
				do_shutdown=1
				break
			fi

			# check to see if there are servers logged in.  If 
			# there are, then it is OK to shut down
			srv_logged_in=0
			for server in $serverlist
			do
				[ $myname = $server ] && continue
					
				node_gid=$( gulm_tool getstats $server 2>/dev/null | grep GenerationID )

				if [ "$node_gid" = "$my_gid" ]
				then
					srv_logged_in=1	
					break
				fi
			done
			if [ $srv_logged_in -eq 1 ]
			then
				do_shutdown=1
				break
			fi

			if [ -n "$warn_msg" ] 
			then
				echo -n "$warn_msg"
				warn_msg=""
			fi

			sleep 1
		done
	else
		do_shutdown=1
	fi

	if [ $do_shutdown -eq 1 ] && gulm_shutdown
	then
		success "shutdown"
		echo
		sts=0
	else
		failure "shutdown"
		echo
		sts=1
	fi
	return $sts
}	

rtrn=1

# See how we were called.
case "$1" in
  start)
	#> # Make sure that ccsd is running 
	#> FIXME -- ccs_read is no longer supported
	#> ccs_read list &>/dev/null || exit 0

	start
	rtrn=$?
	[ $rtrn -eq 0 ] && touch /var/lock/subsys/lock_gulmd
	;;

  stop)
	stop
	rtrn=$?
	[ $rtrn -eq 0 ] && rm -f /var/lock/subsys/lock_gulmd
	;;

  forcestop)
	stop force
	rtrn=$?
	[ $rtrn -eq 0 ] && rm -f /var/lock/subsys/lock_gulmd
	;;

  restart)
	$0 stop
	$0 start
	rtrn=$?
	;;

  status)
	if status lock_gulmd
	then
		if find_master
		then
			echo "gulm_master: $gulm_master is the master"
		else
			echo "gulm_master: gulm master not found"
		fi
	
		if gulm_tool servicelist localhost &> /dev/null
		then
			echo  "Services:"
			gulm_tool servicelist localhost 
		fi
	fi

	rtrn=0
	;;

  *)
	echo $"Usage: $0 {start|stop|restart|status|forcestop}"
	;;
esac

exit $rtrn

