[Devel] Re: [C/R] sleepers don't wake up on restart

Sukadev Bhattiprolu sukadev at linux.vnet.ibm.com
Sat Apr 25 17:56:41 PDT 2009


Oren Laadan [orenl at cs.columbia.edu] wrote:
| 
| I just posted v14-rc3 which includes the c/r of restart-blocks.
| That should improve the situation.
| 
| However, depending on which syscalls one uses, process may still
| seem "stuck" after restart because the current code still does
| not save signals nor task timers; If a signal was pending (SIGALRM
| for example) after freezing but before checkpoint, it will be lost.
| If a timer was set at checkpoint, it will not be restored.
| 
| So depending on your program, you may still experience issues
| until I add patches to handle that.

Ok, Just an fyi, the original program seemed to work fine, but when
I try to restart a small process tree, I get stuck on restart again.

I am running on v14-rc3 branch. Has this got anything to do with
pending SIGCHLD ? Seems to be easier to repro with larger process
trees (2 children per process, 4 or more levels deep).

Test programs (attached) (they need some cleanup though)

	ptree2.c
	p2.loop

--------- Processes after restart:

$ ps -ef|grep ptree

root     10461 10459  0 22:07 pts/0    00:00:00 ./ptree2 -n 1 -d 2
root     10465 10461  0 22:07 pts/0    00:00:00 ./ptree2 -n 1 -d 2
root     10466 10465  0 22:07 pts/0    00:00:00 [ptree2] <defunct>
root     10479  8220  0 22:09 pts/1    00:00:00 grep ptree

---------- Process stacks

tree2        S f6270a90     0 10461  10459
 f5e59380 00000082 08048a86 f6270a90 f6270bfc c2b32260 00000000 0000d9d3
 f5f423b0 00000000 ffffffff 00000000 00000000 00000001 00000000 f6270a88
 00000000 f6270a90 00000000 c02243aa 00000004 00000003 0000000c 00000006
Call Trace:
 [<c02243aa>] do_wait+0x1dd/0x2f6
 [<c021cd14>] default_wake_function+0x0/0x8
 [<c0224542>] sys_wait4+0x7f/0x92
 [<c0224568>] sys_waitpid+0x13/0x17
 [<c0202ce5>] sysenter_do_call+0x12/0x25
 [<c0510000>] rtl8139_init_one+0x5ae/0x887
ptree2        S f5f423b0     0 10465  10461
 f6002180 00000082 c2b265c8 f5f423b0 f5f4251c c2b29260 f67b1f44 e06d0177
 00000282 c023363c c2b265c8 00000000 00000282 0000c350 00000001 0000c350
 00000001 f67b1f44 0000c350 c051be99 00000000 00000001 0000c350 bf9d0e04
Call Trace:
 [<c023363c>] hrtimer_start_range_ns+0x105/0x111
 [<c051be99>] do_nanosleep+0x54/0x8c
 [<c02336d7>] hrtimer_nanosleep+0x8f/0xee
 [<c02332b8>] hrtimer_wakeup+0x0/0x18
 [<c051be7f>] do_nanosleep+0x3a/0x8c
 [<c0233777>] sys_nanosleep+0x41/0x51
 [<c0202ce5>] sysenter_do_call+0x12/0x25
ptree2        ? f6bee040     0 10466  10465
 f638cb80 00000046 00200200 f6bee040 f6bee1ac c2b17260 f6bee038 0000dd77
 00000000 c022f576 ffffffff 00000303 00000000 00000001 00000000 00000012
 f5a61e84 f6bee040 f6bee038 c0224c29 f6270a90 00000001 f6bee038 f5a61f88
Call Trace:
 [<c022f576>] wakeme_after_rcu+0x0/0x8
 [<c0224c29>] do_exit+0x638/0x63c
 [<c0224c87>] do_group_exit+0x5a/0x83
 [<c0224cbd>] sys_exit_group+0xd/0x10
 [<c0202ce5>] sysenter_do_call+0x12/0x25
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ptree2.c
Type: text/x-csrc
Size: 4370 bytes
Desc: ptree2.c
URL: <http://lists.openvz.org/pipermail/devel/attachments/20090425/450f8e89/attachment-0001.bin>
-------------- next part --------------
#!/bin/bash

freezermountpoint=/cgroups
CHECKPOINT=".."
NS_EXEC="$CHECKPOINT/bin/ns_exec"
CR="$CHECKPOINT/bin/cr"
RSTR="$CHECKPOINT/bin/rstr"
MKTREE="$CHECKPOINT/bin/mktree"
ECHO="/bin/echo -e"

TEST_CMD="./ptree2"
TEST_ARGS="-n 1 -d 2"	# -n: children per process, -d: depth of process tree
SCRIPT_LOG="log-p2-loop"
TEST_PID_FILE="pid.ptree2";

LOG_FILE="loop-ptree2.log"
SNAPSHOT_DIR="snap1"

TEST_DONE="test-done"
CHECKPOINT_FILE="checkpoint-ptree2";
CHECKPOINT_READY="checkpoint-ready"
CHECKPOINT_DONE="checkpoint-done"
TEST_LOG_PREFIX="log-ptree2"
TEST_LOG_SNAP="${TEST_LOG_PREFIX}.snap"

freeze()
{
	$ECHO "\t - Freezing $1"
	$ECHO FROZEN > ${freezermountpoint}/$1/freezer.state
	ret=$?
	if [ $ret -ne 0 ]; then
		$ECHO "***** FAIL: \'echo FROZEN \> $state\' returned $ret"
	fi
}

unfreeze()
{
	$ECHO "\t - Unfreezing $1"
	$ECHO THAWED > ${freezermountpoint}/$1/freezer.state
	ret=$?
	if [ $ret -ne 0 ]; then
		$ECHO "***** FAIL: \'echo THAWED \> $state\' returned $ret"
	fi
}

cleancgroup()
{
	$ECHO "\t - Clean cgroup of $1"
	rmdir ${freezermountpoint}/$1
	if [ -d ${freezermountpoint}/$1 ]; then
		$ECHO ***** WARNING ${freezermountpoint}/$1 remains
	fi
}

checkpoint()
{
	local pid=$1

	$ECHO "Checkpoint: $CR $pid $CHECKPOINT_FILE"
	$CR $pid $CHECKPOINT_FILE
	ret=$?
	if [ $ret -ne 0 ]; then
		$ECHO "***** FAIL: Checkpoint of $pid failed"
		ps aux |grep $TEST_CMD >> $SCRIPT_LOG
		exit 1;
	fi
}


function create_container
{
	local pid;

	$ECHO "\t - $NS_EXEC -cpmP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS"
	$NS_EXEC -cpmP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS &

	# Wait for test to finish setup
	while [ ! -f $CHECKPOINT_READY ]; do
		/bin/$ECHO -e "\t - Waiting for $CHECKPOINT_READY"
		sleep 1;
	done;

	# Find global pid of container-init
	pid=`cat $TEST_PID_FILE`;
	if [  "x$pid" == "x" ]; then
		$ECHO "***** FAIL: Invalid container-init pid $pid"
		ps -ef |grep $TEST_CMD >> $SCRIPT_LOG
		exit 1
	fi
	$ECHO "Created container with pid $pid" >> $SCRIPT_LOG
	echo $pid
}

function restart_container
{
	local ret;

	$ECHO "\t - Exec $NS_EXEC -cpuim -- $MKTREE --no-pids < $CHECKPOINT_FILE"

	sleep 1

	$NS_EXEC -cpuim -- $MKTREE --no-pids < $CHECKPOINT_FILE >> $SCRIPT_LOG 2>&1 &
	ret=$?

	if [ $ret -ne 0 ]; then
		$ECHO "***** FAIL: Restart of $pid failed"
		ps aux |grep $TEST_CMD >> $SCRIPT_LOG
		exit 1;
	fi
}


# Check freezer mount point
line=`grep freezer /proc/mounts`
$ECHO $line | grep "\<ns\>"
if [ $? -ne 0 ]; then
	$ECHO "please mount freezer and ns cgroups"
	$ECHO "  mkdir /cgroups"
	$ECHO "  mount -t cgroup -o freezer,ns cgroup /cgroups"
	exit 1
fi
#freezermountpoint=`$ECHO $line | awk '{ print $2 '}`

# Make sure no stray e2 from another run is still going
killall $TEST_CMD > $SCRIPT_LOG 2>&1

cnt=1
while [ 1 ]; do
	> $SCRIPT_LOG;
	dmesg -c > /dev/null

	$ECHO "===== Iteration $cnt"

	# Remove any 'state' files, start the app and let it tell us
	# when it is ready
	rm -f $CHECKPOINT_READY $TEST_DONE $TEST_PID_FILE

	$NS_EXEC -cpumP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS&
	$ECHO "\t - $NS_EXEC -cpumP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS"

	# Wait for test to finish setup
	while [ ! -f $CHECKPOINT_READY ]; do
		$ECHO "\t - Waiting for $CHECKPOINT_READY"
		sleep 1;
	done;

	ps -ef |grep ptree2 >> $SCRIPT_LOG

	# Find global pid of container-init
	pid=`cat $TEST_PID_FILE`;
	if [  "x$pid" == "x" ]; then
		$ECHO "***** FAIL: Invalid container-init pid $pid"
		ps -ef |grep $TEST_CMD
		exit 1
	fi
	$ECHO $pid
	#pid=`create_container`
	$ECHO "\t - Done creating container"

	# Prepare for snapshot
	if [ -d $SNAPSHOT_DIR ]; then
		rm -rf ${SNAPSHOT_DIR}.prev
		mv $SNAPSHOT_DIR ${SNAPSHOT_DIR}.prev
		mkdir $SNAPSHOT_DIR
	fi

	freeze $pid

	num_pids1=`ps -ef |grep $TEST_CMD | wc -l`

	checkpoint $pid

	#$ECHO t > /proc/sysrq-trigger
	#dmesg > dmesg-1.out

	# Snapshot the log files
	cp ${TEST_LOG_PREFIX}* $SNAPSHOT_DIR
	touch $CHECKPOINT_DONE

	killall -9 `basename $TEST_CMD`

	unfreeze $pid

	sleep 3

	cleancgroup $pid

	# Restore the snapshot after the main process has been killed
	/bin/cp ${SNAPSHOT_DIR}/* .

	# Restart.
	restart_container

	sleep 3;
	num_pids2=`ps -ef |grep $TEST_CMD | wc -l`

	ps -ef |grep ptree2 >> $SCRIPT_LOG

	$ECHO "\t - num_pids1 $num_pids1, num_pids2 $num_pids2"; 

	# Find global-pid of container-init
	nspid=`pidof $NS_EXEC`
	if [ "x$nspid" == "x" ]; then
		$ECHO "***** FAIL: Can't find pid of $NS_EXEC"
		exit 1;
	fi
	
	# End test gracefully
	touch $TEST_DONE

	$ECHO "\t - Restart: Waiting for container-init (gloabl-pid $nspid) to exit"
	wait $nspid;
	ret=$?

	$ECHO "Container-init (global-pid $nspid) exited, status $ret"

	if [ -d /cgroups/$pid ]; then
		cleancgroup $pid
	fi

	cnt=$((cnt+1))
done
-------------- next part --------------
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers


More information about the Devel mailing list