[CRIU] [PATCH 4/5] restore: restore pocesses which share one fdtable (v3)

Pavel Emelyanov xemul at parallels.com
Wed Dec 26 05:21:40 EST 2012


> @@ -585,13 +585,34 @@ static int pstree_wait_helpers()
>  
>  static int restore_one_alive_task(int pid, CoreEntry *core)
>  {
> +	struct pstree_item *child;
> +
>  	pr_info("Restoring resources\n");
>  
>  	if (pstree_wait_helpers())
>  		return -1;
>  
> -	if (prepare_fds(current))
> -		return -1;
> +	/*
> +	 * Wait all children, who share a current fd table.
> +	 * We should be sure, that children don't use any file
> +	 * descriptor while fdtable is being restored.
> +	 */

This leads to deadlock. Currently we have shmem restoring tasks wait
for the one with the least pid to restore it. If for any reason child
waits for its parent to restore shmem, and at the same time parent
waits for this child to finish with this fdt mutex we're stuck.

> +	list_for_each_entry(child, &current->children, sibling) {
> +		if (!shared_fdtable(child))
> +			continue;
> +		futex_wait_until(child->rst->fdt_lock, FDT_LOCK_SYNC);
> +	}
> +
> +	if (!shared_fdtable(current)) {
> +		if (prepare_fds(current))
> +			return -1;
> +	} else {
> +		/* Notify a parent, that a current is ready for restoring fdtable */
> +		futex_set_and_wake(current->rst->fdt_lock, FDT_LOCK_SYNC);
> +		futex_wait_until(current->parent->rst->fdt_lock, FDT_LOCK_DONE);
> +	}
> +
> +	futex_set_and_wake(current->rst->fdt_lock, FDT_LOCK_DONE);
>  
>  	if (prepare_fs(pid))
>  		return -1;


More information about the CRIU mailing list