[CRIU] [PATCH 2/2] sysctl: move sysctl calls to usernsd

Pavel Emelyanov xemul at parallels.com
Thu Sep 10 04:48:06 PDT 2015


> diff --git a/include/namespaces.h b/include/namespaces.h
> index 18557af..8034cca 100644
> --- a/include/namespaces.h
> +++ b/include/namespaces.h
> @@ -95,7 +95,7 @@ typedef int (*uns_call_t)(void *arg, int fd);
>   */
>  #define UNS_FDOUT	0x2
>  
> -#define MAX_UNSFD_MSG_SIZE 256
> +#define MAX_UNSFD_MSG_SIZE 4096
>  
>  /*
>   * When we're restoring inside user namespace, some things are
> diff --git a/namespaces.c b/namespaces.c
> index 1677a7a..76e05ce 100644
> --- a/namespaces.c
> +++ b/namespaces.c
> @@ -908,7 +908,7 @@ static int usernsd(int sk)
>  
>  	while (1) {
>  		struct unsc_msg um;
> -		static char msg[MAX_MSG_SIZE];
> +		static char msg[MAX_UNSFD_MSG_SIZE];

I have a subtle feeling that this hunk should be in patch #1 :)

>  		uns_call_t call;
>  		int flags, fd, ret;
>  
> @@ -975,7 +975,7 @@ int userns_call(uns_call_t call, int flags,
>  	bool async = flags & UNS_ASYNC;
>  	struct unsc_msg um;
>  
> -	if (unlikely(arg_size > MAX_MSG_SIZE)) {
> +	if (unlikely(arg_size > MAX_UNSFD_MSG_SIZE)) {

So does this...

>  		pr_err("UNS: message size exceeded\n");
>  		return -1;
>  	}

> diff --git a/sysctl.c b/sysctl.c
> index b059140..9e1807f 100644
> --- a/sysctl.c
> +++ b/sysctl.c
> +/*
> + * In order to avoid lots of opening of /proc/sys for each struct sysctl_req,
> + * we encode each array of sysctl_reqs into one contiguous region of memory so
> + * it can be passed via userns_call. It looks like this:
> + *
> + *  struct sysctl_userns_req    struct sysctl_req       name        arg
> + * ---------------------------------------------------------------------------
> + * |  op  |  nr_req  |  reqs  | <fields> | name | arg | "the name" | "the arg" ...
> + * ---------------------------------------------------------------------------
> + *                       |____^             |______|__^            ^
> + *                                                 |_______________|
> + */
> +
> +static int __sysctl_op(void *arg, int unused)
>  {
> -	int ret = 0;
> -	int dir = -1;
> +	int fd, ret = -1, nr, flags, dir, i;
> +	struct sysctl_userns_req *userns_req = arg;
> +	int op = userns_req->op;
> +	struct sysctl_req *req;
>  
> -	dir = open("/proc/sys", O_RDONLY);
> +	dir = open("/proc/sys", O_RDONLY, O_DIRECTORY);

The userns daemon starts before forking init task and thus lives in criu's namespaces.
The sysctl restore should operate on the init's (at least) net namespace, so this
open() should open the wrong sysctl. Am I wrong?

>  	if (dir < 0) {
>  		pr_perror("Can't open sysctl dir");
>  		return -1;
>  	}
>  

-- Pavel


More information about the CRIU mailing list