[CRIU] [PATCH] add join-ns opt to criu restore

Pavel Emelyanov xemul at virtuozzo.com
Wed Mar 23 04:36:57 PDT 2016


On 03/21/2016 09:12 AM, Dengguangxing wrote:
> join-ns will restore process with specified existing namespace.
> This opt can be used in this fomat:
> 	--join-ns NS:PID|NS_FILE
> for example --join-ns net:12345 or --join-ns net:/foo/bar.
> 
> pid namespaces is not supported yet. As fork() is needed to make
> new pid-namespace work. That makes it hard for criu to track the
> child-process through pid because another child process has been
> created after fork().

Thanks for your work!

Note, that once the patch gets merged we'll need the zdtm.py extention
to test this functionaluty. Only after this (and after tests pass) I'll
be able to move this patch from criu-dev branch into master and release.

Please, find comments to this particular patch inline.

> @@ -99,6 +101,41 @@ bad_ns:
>  	return -1;
>  }
> 
> +static int parse_join_ns(const char *ptr)
> +{
> +	char *aux, *ns_file, *extra_opts = NULL;
> +
> +	aux = strchr(ptr, ':');
> +	if (aux == NULL)
> +		return -1;
> +	*aux = '\0';
> +
> +	if (!strncmp(ptr, "pid", 4)) {
> +		pr_perror("pid namespace not supported in join-ns\n");
> +		return -1;
> +	} else if (strncmp(ptr, "net", 4) &&
> +		strncmp(ptr, "uts", 4) &&
> +		strncmp(ptr, "ipc", 4) &&
> +		strncmp(ptr, "user", 5) &&
> +		strncmp(ptr, "mnt", 4)) {
> +		pr_perror("Illegal namespace %s in join-ns\n", ptr);
> +		return -1;
> +	}

We already have a code that turns namespaces' names into flags,
it's in parse_unshare_arg. If you _do_ need to turn namesp into
flags, please, pull this code into a separate function (with
separate patch) and use it in your code.

> +
> +	ns_file = aux + 1;
> +	aux = strchr(ns_file, ',');
> +	if (aux != NULL) {
> +		*aux = '\0';
> +		extra_opts = aux + 1;
> +	} else {
> +		extra_opts = NULL;
> +	}
> +	if (join_ns_add(ptr, ns_file, extra_opts))
> +		return -1;
> +
> +	return 0;
> +}
> +
>  static int parse_cpu_cap(struct cr_options *opts, const char *optarg)
>  {
>  	bool inverse = false;

> @@ -810,6 +857,12 @@ usage:
>  "  --empty-ns {net}\n"
>  "			Create a namespace, but don't restore its properies.\n"
>  "			An user will retore them from action scripts.\n"
> +"  -J|--join-ns NS:PID|NS_FILE,EXTRA_OPTS\n"

Please, separate extra opts with colon and mark them as optional. So the
help would look like this

-J|--join-ns NS:PID|NS_FILE[:EXTRA_OPTS]

> +"			Join exist namespace and restore process in it.\n"
> +"			Namespace can be specified in pid or file path format.\n"
> +"			    --join-ns net:12345 or --join-ns net:/foo/bar.\n"
> +"			Extra_opts is optional, for now only user namespace support: \n"
> +"			    --join-ns user:PID,UID,GID to specify uid and gid.\n"
>  "\n"
>  "* Logging:\n"
>  "  -o|--log-file FILE    log file name\n"

> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> index 303c9e6..3729aed 100644
> --- a/criu/include/namespaces.h
> +++ b/criu/include/namespaces.h
> @@ -19,6 +21,24 @@ struct ns_desc {
>  	size_t		len;
>  };
> 
> +struct user_ns_extra {
> +	char	*uid;
> +	char	*gid;
> +};
> +
> +/* struct join_ns is used for storing parameters specified by --join-ns */
> +struct join_ns {
> +	struct list_head 	list;
> +	char			*ns_file;
> +	int			ns_fd; 	/* namespace file descriptor */

Not sure this is required on join_ns. The fd obtained is purely used
in one call-stack, so just keeping it on stack should work.

> +	struct ns_desc		nd; 	/* namespace descriptor */

Better grab pointer on one of existing nd-s rather than constructing
those by yourself.

> +	/* extra options of --join-ns, like uid&gid in user namespace */
> +	union {
> +		struct user_ns_extra	user_extra;
> +		char			*common_extra;
> +	}extra_opts;
> +};
> +
>  enum ns_type {
>  	NS_UNKNOWN = 0,
>  	NS_CRIU,

> diff --git a/criu/namespaces.c b/criu/namespaces.c
> index d7f8a9f..99e420a 100644
> --- a/criu/namespaces.c
> +++ b/criu/namespaces.c
> @@ -35,6 +39,164 @@ static struct ns_desc *ns_desc_array[] = {
>  	&cgroup_ns_desc,
>  };
> 
> +unsigned int get_join_ns_flag() {
> +	struct join_ns *jn;
> +	unsigned int join_ns_flags = 0;
> +
> +	list_for_each_entry(jn, &opts.join_ns, list)
> +		if (!strncmp(jn->nd.str, "uts", 4))
> +			join_ns_flags |= CLONE_NEWUTS;
> +		else if (!strncmp(jn->nd.str, "ipc", 4))
> +			join_ns_flags |= CLONE_NEWIPC;
> +		else if (!strncmp(jn->nd.str, "mnt", 4))
> +			join_ns_flags |= CLONE_NEWNS;
> +		else if (!strncmp(jn->nd.str, "pid", 4))
> +			join_ns_flags |= CLONE_NEWPID;
> +		else if (!strncmp(jn->nd.str, "net", 4))
> +			join_ns_flags |= CLONE_NEWNET;
> +		else if (!strncmp(jn->nd.str, "user", 5))
> +			join_ns_flags |= CLONE_NEWNET;
> +
> +	return join_ns_flags;

This is too heavy. Instead of keepin the strings with names and
parsing them every time, either store the namespaces' flags in
a variable, or keep an array/list of required ns_desc-s.

> +}
> +
> +int check_namespace_opts() {
> +	unsigned int join_ns_flag;
> +
> +	join_ns_flag = get_join_ns_flag();
> +
> +	errno = 22;
> +	if (join_ns_flag & opts.rst_namespaces_flags) {
> +		pr_perror("Conflict flags: -join-ns and -namespace");
> +		return -1;
> +	}
> +	if (join_ns_flag & opts.empty_ns) {
> +		pr_perror("Conflict flags: -join-ns and -empty-ns");
> +		return -1;
> +	}
> +	if (opts.empty_ns & opts.rst_namespaces_flags) {
> +		pr_perror("Conflict flags: -empty-ns and -namespace");
> +		return -1;
> +	}
> +	errno = 0;
> +	return 0;
> +}
> +
> +static int check_int_str(char *str) {
> +	if (str == NULL)
> +		return 0;
> +
> +	if (*str == '\0') {
> +		str = NULL;
> +		return 0;
> +	}
> +
> +	char *endptr;
> +	long val;
> +	errno = 22;
> +	val = strtol(str, &endptr, 10);
> +	if ((errno == ERANGE) || (endptr == str)
> +			|| (*endptr != '\0')
> +			|| (val < 0) || (val > 65535)) {
> +		pr_perror("Illegal input: %s", str);
> +		str = NULL;
> +		return -1;
> +	}
> +
> +	errno = 0;
> +	return 0;
> +}
> +
> +static int check_ns_file(char *ns_file) {
> +	int pid, ret, proc_dir;
> +
> +	if (!check_int_str(ns_file)) {
> +		pid = atoi(ns_file);
> +		if (pid <= 0) {
> +			pr_perror("Invalid join_ns pid %s", ns_file);
> +			return -1;
> +		}
> +		proc_dir = open_pid_proc(pid);
> +		if (proc_dir < 0) {
> +			pr_perror("Invalid join_ns pid: /proc/%s not found", ns_file);
> +			return -1;
> +		}
> +		return 0;
> +	}
> +
> +	ret = access(ns_file, 0);
> +	if (ret < 0) {
> +		pr_perror("Can't access join-ns file: %s", ns_file);
> +		return -1;
> +	}

I doubt these checks should be done here. Once you'll try to join the
namespace, specified by the option, you'll hit an error and restore
would abort antway.

> +	return 0;
> +}
> +
> +static int set_user_extra_opts(struct join_ns *jn, char *extra_opts) {
> +	char *uid, *gid, *aux;
> +	if (extra_opts == NULL) {
> +		jn->extra_opts.user_extra.uid = NULL;
> +		jn->extra_opts.user_extra.gid = NULL;
> +		return 0;
> +	}
> +
> +	uid = extra_opts;
> +	aux = strchr(extra_opts, ',');
> +	if (aux == NULL) {
> +		gid = NULL;
> +	} else {
> +		*aux = '\0';
> +		gid = aux + 1;
> +	}
> +
> +	if (check_int_str(uid) || check_int_str(gid)) {
> +		return -1;
> +	}
> +	jn->extra_opts.user_extra.uid = uid;
> +	jn->extra_opts.user_extra.gid = gid;
> +
> +	return 0;
> +}
> +
> +int join_ns_add(const char *type, char *ns_file, char *extra_opts) {
> +	struct join_ns *jn;
> +
> +	jn = xmalloc(sizeof(*jn));
> +	if (!jn) {
> +		return -1;
> +	}
> +
> +	if (check_ns_file(ns_file)) {
> +		return -1;
> +	}
> +
> +	jn->ns_file = ns_file;
> +	if (!strncmp(type, "net", 4)) {
> +			jn->nd = net_ns_desc;
> +	} else if (!strncmp(type, "uts", 4)) {
> +			jn->nd = uts_ns_desc;
> +	} else if (!strncmp(type, "ipc", 4)) {
> +			jn->nd = ipc_ns_desc;
> +	} else if (!strncmp(type, "pid", 4)) {
> +			jn->nd = pid_ns_desc;
> +	} else if (!strncmp(type, "user", 5)) {
> +			jn->nd = user_ns_desc;
> +			if (set_user_extra_opts(jn, extra_opts)) {
> +				pr_perror("invalid user namespace extra_opts %s\n", extra_opts);
> +				return -1;
> +			}
> +	} else if (!strncmp(type, "mnt", 4)) {
> +			jn->nd = mnt_ns_desc;
> +	} else {
> +			pr_perror("invalid namespace type %s\n", type);
> +			return -1;

Indentation is one tab wider than it should.

> +	}
> +
> +	list_add_tail(&jn->list, &opts.join_ns);
> +	pr_info("Added %s:%s join namespace\n", type, ns_file);
> +	return 0;
> +}
> +
>  static unsigned int parse_ns_link(char *link, size_t len, struct ns_desc *d)
>  {
>  	unsigned long kid = 0;
> @@ -78,7 +240,7 @@ int switch_ns(int pid, struct ns_desc *nd, int *rst)
> 
>  	nsfd = open_proc(pid, "ns/%s", nd->str);
>  	if (nsfd < 0) {
> -		pr_perror("Can't open ipcns file");
> +		pr_perror("Can't open ns file");

All cleanups should go in separate patches, please.

>  		goto err_ns;
>  	}
> 
> @@ -1371,6 +1533,150 @@ static int prepare_userns_creds()
>  	return 0;
>  }
> 
> +static int get_join_ns_fd(struct join_ns *jn) {
> +	int pid, fd;
> +	char nsf[32];
> +	char *pnsf;
> +
> +	if (jn->ns_file == NULL) {
> +		pr_perror("join-ns file is NULL");
> +		return -1;
> +	}
> +
> +	pid = atoi(jn->ns_file);
> +	if (pid > 0) {
> +		snprintf(nsf, sizeof(nsf), "/proc/%d/ns/%s", pid, jn->nd.str);
> +		pnsf = nsf;
> +	} else {
> +		pnsf = jn->ns_file;
> +	}
> +
> +	fd = open(pnsf, O_RDONLY);
> +	if (fd < 0) {
> +		pr_perror("Can't open ns file: %s", pnsf);
> +		return -1;
> +	}
> +	jn->ns_fd = fd;
> +	return 0;
> +}
> +
> +static int close_join_ns_fd(struct join_ns *jn) {
> +	if (jn->ns_fd < 0) {

The close_safe() checks for <0 itself.

> +		return -1;
> +	}
> +
> +	if (close_safe(&jn->ns_fd))
> +		return -1;
> +	return 0;
> +}
> +
> +static int switch_join_ns(struct join_ns *jn) {
> +	if (jn->ns_fd < 0) {
> +		pr_perror("Invalid ns fd in join-ns");
> +		return -1;
> +	}
> +
> +	int ret = -1;
> +	int self_fd;
> +	struct stat st, self_st;
> +	char buf[32];

We follow the Linux kernel coding style. In particular, in function
declarations braces should be on separate line and all the variables
should be declared before any code.

> +
> +	if (fstat(jn->ns_fd, &st) == -1) {
> +		pr_perror("Can't get ns file %s stat", jn->ns_file);
> +		goto err_ns;
> +	}
> +
> +	snprintf(buf, sizeof(buf), "/proc/self/ns/%s", jn->nd.str);
> +	self_fd = open(buf, O_RDONLY);
> +	if (self_fd < 0) {
> +		pr_perror("Can't open ns file: %s", buf);
> +		goto err_ns;
> +	}
> +	if (fstat(self_fd, &self_st) == -1) {

There's no need in opening self-fd, just use stat() system call and
stat by path.

> +		pr_perror("Can't get ns file %s stat", buf);
> +		goto err_self;
> +	}
> +
> +	if (st.st_ino != self_st.st_ino) {

Is this check _really_ required? Kernel would behave OK and won't
move the process if namespaces already match.

> +		ret = setns(jn->ns_fd, jn->nd.cflag);
> +		if (ret < 0) {
> +			pr_perror("Can't setns %s/%s", jn->ns_file, jn->nd.str);
> +			goto err_self;
> +		}
> +	}
> +	ret = 0;
> +
> +err_self:
> +	close(self_fd);
> +err_ns:
> +	return ret;
> +}
> +
> +static int switch_user_join_ns(struct join_ns *jn) {
> +	if (jn == NULL) {
> +		return 0;
> +	}
> +
> +	uid_t uid;
> +	gid_t gid;
> +
> +	if (switch_join_ns(jn))
> +		return -1;
> +
> +	if (jn->extra_opts.user_extra.uid == NULL) {
> +		uid = getuid();
> +	} else {
> +		uid = atoi(jn->extra_opts.user_extra.uid);
> +	}
> +	if (jn->extra_opts.user_extra.gid == NULL) {
> +		gid = getgid();
> +	} else {
> +		gid = atoi(jn->extra_opts.user_extra.gid);
> +	}

All those { and } braces can be avoided here

> +
> +	/* FIXME:
> +	 * if err occurs in setuid/setgid, should we just alert or
> +	 * return an error
> +	 */
> +	if (setuid(uid)) {
> +		pr_perror("setuid failed while joining userns");
> +		return -1;
> +	}
> +	if (setgid(gid)) {
> +		pr_perror("setgid failed while joining userns");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +int join_namespaces()
> +{
> +	struct join_ns *jn, *user_jn = NULL;
> +	int ret = -1;
> +
> +	list_for_each_entry(jn, &opts.join_ns, list)
> +		if (get_join_ns_fd(jn)) {
> +			goto err_out;
> +		}
> +
> +	list_for_each_entry(jn, &opts.join_ns, list)
> +		if (!strncmp(jn->nd.str, "user", 5)) {
> +			user_jn = jn;
> +		} else {
> +			if (switch_join_ns(jn))
> +				goto err_out;
> +		}
> +
> +	if (switch_user_join_ns(user_jn))
> +		goto err_out;
> +
> +	ret = 0;
> +err_out:
> +	list_for_each_entry(jn, &opts.join_ns, list)
> +		close_join_ns_fd(jn);
> +	return ret;
> +}
> +
>  int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
>  {
>  	pid_t pid = item->pid.virt;
> diff --git a/criu/net.c b/criu/net.c
> index a67cd6e..ba871c7 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -1094,8 +1094,11 @@ int prepare_net_ns(int pid)
>  {
>  	int ret = 0;
>  	NetnsEntry *netns = NULL;
> +	unsigned int join_ns_flag = 0;
> 
> -	if (!(opts.empty_ns & CLONE_NEWNET)) {
> +	join_ns_flag = get_join_ns_flag();
> +	if (!(opts.empty_ns & CLONE_NEWNET) ||
> +			!(join_ns_flag & CLONE_NEWNET)) {

Hasn't this already been checked in check_namespace_opts()?

>  		ret = restore_netns_conf(pid, &netns);
>  		if (!ret)
>  			ret = restore_links(pid, &netns);

-- Pavel


More information about the CRIU mailing list