#!/bin/bash
#
# Cluster NFS failover test script.
#

[ -z "$MP" ] && MP=/mnt/tmp			# Mount point for our tests
[ -z "$NFSOPS" ] && NFSOPS="nfsvers=3,noac"	# Any default NFS options
[ -z "$NODE1" ] && NODE1="magenta"		# First node in cluster
[ -z "$NODE2" ] && NODE2="yellow"		# Second node in cluster
[ -z "$SERVICE" ] && SERVICE="nfs_service"	# Name of NFS service 
[ -z "$EXPORT" ] && EXPORT="192.168.1.30:/mnt/nfs_test"	# NFS export
[ -z "$TESTS" ] && TESTS="t_mount t_basic t_restart t_relo t_failover" # tests to run


#
# I/O loop
#
if [ "$1" = "iochild" ]; then
	while : ; do
		if ! dd if=/dev/urandom of=$MP/big-file bs=1k count=512 &> /tmp/$0-child.log; then
			echo "Child I/O Failure"
			kill -USR2 $PPID
			exit 1
		fi
		
		#
		# Notify parent we're still okay.
		#
		kill -USR1 $PPID

		#
		# Wait for OK to continue from parent
		# suspend -f doesn't work. Heh.
		# If we spawn another dd, the umount will get
		# EBUSY ;(
		#
		kill -STOP $$
	done

	exit 1
fi


die()
{
	declare P

	echo $*

	child_kill
	umount $MP
	echo "Output:"
	cat /tmp/$0.log
	exit 1
}


killed_by_child()
{
	echo "Killed by child!"
	echo "Output from child:"
	cat /tmp/$0-child.log
	exit 1
}


child_woke()
{
	echo "Received wakeup signal from child"
}


child_stopped()
{
	declare P

	echo "Sending continue to child..."

	for P in `jobs -l 2>/dev/null | grep iochild | awk '{print $2}'`; do
		kill -CONT $P &> /dev/null
	done
}


enable_service()
{
	echo -n "  Enabling $2 on $1..."
	ssh $1 "clusvcadm -e $2" &> /tmp/$0.log || die "Couldn't disable $1"
	echo "Done"
}


disable_service()
{
	echo -n "  Disabling $2..."
	ssh $1 "clusvcadm -d $2" &> /tmp/$0.log || die "Couldn't disable $1"
	echo "Done"
}


sigint_handler()
{
	echo
	echo
	echo "Term signal received!"
	child_kill
	exit 1
}

mount_mp()
{
	declare opts=""

	if [ -z "$NFSOPS" ] && [ -n "$1" ]; then
		opts="-o $1"
	elif [ -z "$1" ] && [ -n "$NFSOPS" ]; then
		opts="-o $NFSOPS"
	elif [ -n "$1" ] && [ -n "$NFSOPS" ]; then
		opts="-o $NFSOPS,$1"
	fi

	echo -n "  Mounting $opts..."
	mount $opts $EXPORT $MP || die "Failed to mount."
	echo "Done"
}


umount_mp()
{
	echo -n "  Umounting..."
	umount $MP || die "Failed to umount $MP"
	echo "Done"
}


child_spawn()
{
	echo -n "  Spawning I/O child task..."
	trap child_stopped SIGUSR1
	$0 iochild &> /dev/null &
	echo "Done"
}

child_wait()
{
	trap child_woke SIGUSR1
	echo -n "  Waiting for child task respond: "
	wait
}


#
# Arg 1 - child name
#
child_kill()
{
	declare P
	declare J
	declare c

	# Kill all children if none are specified.
	c=$1
	[ -z "$c" ] && c="."

	#
	# Kill the child
	#
	[ -z "$1" ] && echo -n "  Killing all children: "
	[ -n "$1" ] && echo -n "  Killing $1 children: "
	while read J P; do
		J=${J/*[/}
		J=${J/]*/}

		disown %$J &> /dev/null
		kill -KILL $P &> /dev/null
		kill -CONT $P &> /dev/null
	done < <(jobs -l 2>/dev/null | grep $c | awk '{print $1,$2}')
	echo "Done"
}


send_service_home()
{
	echo -n "  Sending $SERVICE to $NODE1..."
	ssh $NODE2 "clusvcadm -d $SERVICE" &> /tmp/$0.log || die "Couldn't disable $SERVICE"
	ssh $NODE1 "clusvcadm -e $SERVICE" &> /tmp/$0.log || die "Couldn't enable $SERVICE on $NODE1"
	echo "Done"
}


reboot_node()
{
	declare P
	declare J

	echo "  Warning: Rebooting $1!"
	ssh $1 "reboot -fn" &
	sleep 1

	child_kill ssh
}



t_mount()
{
	#
	# Test 1 -- Mount/unmount
	#
	echo "Test: Basic NFS mount/umount test"
	for proto in tcp udp; do
		mount_mp $proto
		umount_mp
	done
	echo "Test: Success"
	echo
}


t_basic()
{
	#
	# Test 2 -- NFS mount + io (e.g. ls)
	#
	echo "Test: Basic NFS mount/io/umount test"
	for proto in tcp udp; do
		mount_mp $proto
	
		child_spawn
		child_wait
		child_kill iochild
	
		umount_mp
	done
	echo "Test: Success"
	echo
}
	

t_restart()
{	
	#
	# Test 3 -- Restart NFS service during metadata I/O
	#
	echo "Test: NFS I/O during restart on same cluster node"
	for proto in tcp udp; do
		mount_mp $proto
		child_spawn
		
		disable_service $NODE1 $SERVICE
		echo -n "  Sleeping 3 seconds..."
		sleep 3
		echo Done
		enable_service $NODE1 $SERVICE
	
		child_wait
		child_kill iochild
		umount_mp
	done
	echo "Test: Success"
	echo
}


t_relo()
{
	#
	# Test 4 - NFS Mount/io/umount during relocation to different node
	#
	echo "Test: NFS I/O during relocation"
	for proto in tcp udp; do
		mount_mp $proto
		child_spawn
		
		disable_service $NODE1 $SERVICE
		echo -n "  Sleeping 3 seconds..."
		sleep 3
		echo Done
		enable_service $NODE2 $SERVICE
	
		child_wait
		child_kill iochild
		umount_mp
		send_service_home
	done
	echo "Test: Success"
	echo
}
	
	
t_failover()
{
	#
	# Test 5 - NFS Mount/io/FAILOVER
	#
	echo "Test: NFS I/O during failover [ TCP ONLY ]"
	for proto in tcp; do
		disable_service $NODE1 $SERVICE
		enable_service $NODE2 $SERVICE
	
		mount_mp $proto
		child_spawn
	
		reboot_node $NODE2
	
		child_wait
		child_kill iochild
		umount_mp
	done
	echo "Test: Success"
	echo
}
	
	
#
# Setup
#
echo "Setting up..."
trap sigint_handler SIGINT
trap sigint_handler SIGTERM
trap sigint_handler SIGQUIT
trap killed_by_child SIGUSR2

echo "  Node 1: $NODE1"
echo "  Node 2: $NODE2"
echo "  Service Name: $SERVICE"
echo "  Mount point: $MP"
echo "  NFS Export: $EXPORT"
echo "  NFS Mount Options: $NFSOPS"
echo "  Tests: $TESTS"

send_service_home
mkdir -p $MP
umount $MP &> /dev/null
rm -f /tmp/$0.log /tmp/$0-child.log

echo

for t in $TESTS; do
	$t
done

exit 0
