[CRIU] [PATCH v3 04/16] autofs: dump stage introduced

Pavel Emelyanov xemul at parallels.com
Mon Dec 14 03:03:41 PST 2015


On 12/10/2015 06:16 PM, Stanislav Kinsburskiy wrote:

> +static char *add_to_string_vargs(char *str, const char *fmt, va_list args)
> +{
> +	size_t offset = 0, delta;
> +	int ret;
> +	char *new;
> +	va_list tmp;
> +
> +	if (str)
> +		offset = strlen(str);
> +	delta = strlen(fmt) * 2;
> +
> +	do {
> +		ret = -ENOMEM;
> +		new = xrealloc(str, offset + delta);
> +		if (new) {
> +			va_copy(tmp, args);
> +			ret = vsnprintf(new + offset, delta, fmt, tmp);
> +			if (ret >= delta) {
> +				delta = ret +1;
> +				str = new;
> +				ret = 0;
> +			}
> +		}
> +	} while (ret == 0);
> +
> +	if (ret == -ENOMEM) {
> +		/* realloc failed. We must release former string */
> +		pr_err("Failed to allocate string\n");
> +		xfree(str);
> +	} else if (ret < 0) {
> +		/* vsnprintf failed */
> +		pr_err("Failed to print string\n");
> +		xfree(new);
> +		new = NULL;
> +	}
> +	return new;

This is an overkill for a very simple thing you want. You use this in 3 places.

1. To stat("/proc/%d/fd/%d"). It's MUCH simpler to just introduce a buffer of
   large enough length (see the PSFDS constant for inspiration) and sprintf
   the string there.

2. To attach plain string option to existing str. We already have the attach_option()
   helper in mount.c

3. To attach read_fd=%d option with fd value. Make the combination of above two:
   static buffer for the option itself and existing attach_option() to attach one.

> +}
> +

> +static int autofs_find_pipe_read_end(int pgrp, long ino, int *read_fd)
> +{
> +	DIR *dir;
> +	struct dirent *de;
> +	int ret = -1;
> +
> +	dir = opendir_proc(pgrp, "fd");
> +	if (dir == NULL)
> +		return -1;
> +
> +	while ((de = readdir(dir))) {
> +		struct stat buf;
> +		int found, mode, fd;
> +
> +		if (dir_dots(de))
> +			continue;
> +
> +		if (fstatat(dirfd(dir), de->d_name, &buf, 0) < 0) {
> +			pr_perror("Failed to fstatat");
> +			break;
> +		}
> +
> +		fd = atoi(de->d_name);
> +
> +		found = autofs_check_fd_stat(&buf, pgrp, fd, ino, &mode);
> +		if (found < 0)
> +			break;
> +		if (found && (mode == O_RDONLY)) {
> +			*read_fd = fd;
> +			ret = 0;
> +			break;
> +		}
> +	}
> +
> +	closedir(dir);
> +	close_pid_proc();

Don't close_pid_proc() here, with this you kill the cache which you're not
supposed to.

> +
> +	return ret;
> +}
> +
> +static int autofs_fixup_options(struct mount_info *pm, int pgrp, bool catatonic,
> +				int kernel_fd, bool kernel_fd_alive,
> +				int read_fd)
> +{
> +	char **options;
> +	int nr_opts, i, err = -1;
> +	char *new_opt;
> +	struct mount_info *t;
> +
> +	new_opt = construct_string("pgrp=%d,fd=%d", pgrp, kernel_fd);
> +	if (!new_opt)
> +		return -1;
> +
> +	if (!catatonic && !kernel_fd_alive) {
> +		/* Write end is closed or invalid
> +		 * Let's introduce new options only for us to carry pipe
> +		 * information. */

So, do I get it right, that the only reason for doing the complex write-end 
discovery is to decide whether or not to keep read_fd=%d in the image?

> +		new_opt = add_to_string(new_opt, ",read_fd=%d", read_fd);

If you need to keep some fs-specific data in the image that is not going to
be passed as-is into the mount() call, just introduce the necessary optional
fields in the mnt proto file.

> +	}
> +	if (!new_opt)
> +		return -1;
> +
> +	split(pm->options, ',', &options, &nr_opts);
> +	if (!options)
> +		return -1;
> +
> +	for (i = 0; i < nr_opts; i++) {
> +		char *opt = options[i];
> +
> +		if (!strncmp(opt, "pgrp=", strlen("pgrp=")) ||
> +		    !strncmp(opt, "fd=", strlen("fd=")) ||
> +		    !strncmp(opt, "pipe_ino=", strlen("pipe_ino=")))
> +			continue;
> +
> +		new_opt = add_to_string(new_opt, ",%s", opt);
> +		if (!new_opt)
> +			goto out;
> +	}
> +
> +	xfree(pm->options);
> +	pm->options = new_opt;
> +
> +	/* Change options for all bind-mounts */

What for?

> +	list_for_each_entry(t, &pm->mnt_bind, mnt_bind) {
> +		xfree(t->options);
> +		t->options = xstrdup(pm->options);
> +		if (!t->options)
> +			goto out;
> +	}
> +
> +	err = 0;
> +
> +	pr_info("autofs fixed options: \"%s\"\n", pm->options);
> +
> +out:
> +	for (i = 0; i < nr_opts; i++)
> +		xfree(options[i]);
> +	xfree(options);
> +	return err;
> +}
> +
> +int autofs_dump(struct mount_info *pm)
> +{
> +	char *pgrp_opt, *fd_opt, *pipe_ino_opt;
> +	int pgrp, vpgrp = 0;
> +	int fd, kernel_fd, read_fd = -1;
> +	long pipe_ino;
> +	bool catatonic = false, kpipe_alive = false;
> +
> +	pr_info("autofs \"%s\" options: %s\n", pm->mountpoint, pm->options);
> +
> +	pgrp_opt = strstr(pm->options, "pgrp=");
> +	if (!pgrp_opt) {
> +		pr_err("Failed to find pgrp option\n");
> +		return -1;
> +	}
> +
> +	fd_opt = strstr(pm->options, "fd=");
> +	if (!fd_opt) {
> +		pr_err("Failed to find fd option\n");
> +		return -1;
> +	}
> +
> +	pipe_ino_opt = strstr(pm->options, "pipe_ino=");
> +	if (!pipe_ino_opt) {
> +		pr_err("Failed to find pipe_ino option\n");
> +		return -1;
> +	}

The options parsing implementation is quite ineffective. First you call
strstr() per-option, then sscanf() on found stuff, then you split() the
whole string and strcmp() individual pieces. Plz, optimize this -- first
split the opts, then scan, keeping the needed stuff into variables, then
do whatever you need on the options parsed, then construct the opts
string back for image.

> +
> +	if (sscanf(pgrp_opt, "pgrp=%d", &pgrp) != 1) {
> +		pr_err("Failed to get pgrp: %s\n", pgrp_opt);
> +		return -1;
> +	}
> +
> +	if (sscanf(fd_opt, "fd=%d", &kernel_fd) != 1) {
> +		pr_err("Failed to get fd: %s\n", fd_opt);
> +		return -1;
> +	}
> +
> +	if (sscanf(pipe_ino_opt, "pipe_ino=%ld", &pipe_ino) != 1) {
> +		pr_err("Failed to get pipe_ino: %s\n", pipe_ino_opt);
> +		return -1;
> +	}
> +
> +	/* When fd is equal to -1, it's a catatonic mount.
> +	 * In this case fd, pipe_ino, and pgrp doesn't mean anything. */
> +	if (kernel_fd == -1) {
> +		catatonic = true;
> +		goto fix_it;
> +	}
> +
> +	/* We need to get virtual pgrp to restore mount */
> +	vpgrp = pid_to_virt(pgrp);
> +	if (!vpgrp) {
> +		pr_err("failed to find pstree item with pid %d\n",
> +				pgrp);
> +		pr_err("Non-catatonic mount without master?\n");
> +		return -1;
> +	}
> +
> +	/* Let' check whether write end is still open */
> +	switch (autofs_kernel_pipe_alive(pgrp, kernel_fd, pipe_ino)){
> +		case 1:
> +			/* This kernel pipe end fd.
> +			 * We don't need to search for write end. */
> +			kpipe_alive = true;
> +		case 0:
> +			break;
> +		default:
> +			pr_err("Failed to check fd %d in process %d\n",
> +					kernel_fd, pgrp);
> +			return -1;
> +	}
> +
> +	/* Let's try to find process file descriptors, which corresponds to
> +	 * desired pipe. */
> +	if (autofs_find_pipe_read_end(pgrp, pipe_ino, &read_fd) < 0) {
> +		pr_err("Failed to find read pipe fd (ino %ld) in process %d\n",
> +				pipe_ino, pgrp);
> +		return -1;
> +	}
> +
> +	if (read_fd == -1) {
> +		pr_err("Master %d doesn't have a read end of the pipe with "
> +			"inode %ld opened\n", pgrp, pipe_ino);
> +		pr_err("Abandoned mount or control was delegated to child?\n");
> +		return -1;
> +	}
> +
> +	/* Let's check, that read end is empty */
> +	fd = open_proc(pgrp, "fd/%d", read_fd);
> +	if (fd < 0)
> +		return -1;
> +
> +	if (fd_has_data(fd)) {
> +		pr_err("Process %d autofs pipe fd %d is not empty.\n", pgrp,
> +				read_fd);
> +		pr_err("Try again later.\n");
> +		return -1;
> +	}
> +
> +	close(fd);
> +
> +fix_it:
> +	return autofs_fixup_options(pm, vpgrp, catatonic,
> +				    kernel_fd, kpipe_alive,
> +				    read_fd);
> +}

-- Pavel



More information about the CRIU mailing list