[CRIU] [PATCH v3 04/16] autofs: dump stage introduced

Stanislav Kinsburskiy skinsbursky at odin.com
Mon Dec 14 03:12:47 PST 2015



14.12.2015 12:03, Pavel Emelyanov пишет:
> On 12/10/2015 06:16 PM, Stanislav Kinsburskiy wrote:
>
>> +static char *add_to_string_vargs(char *str, const char *fmt, va_list args)
>> +{
>> +	size_t offset = 0, delta;
>> +	int ret;
>> +	char *new;
>> +	va_list tmp;
>> +
>> +	if (str)
>> +		offset = strlen(str);
>> +	delta = strlen(fmt) * 2;
>> +
>> +	do {
>> +		ret = -ENOMEM;
>> +		new = xrealloc(str, offset + delta);
>> +		if (new) {
>> +			va_copy(tmp, args);
>> +			ret = vsnprintf(new + offset, delta, fmt, tmp);
>> +			if (ret >= delta) {
>> +				delta = ret +1;
>> +				str = new;
>> +				ret = 0;
>> +			}
>> +		}
>> +	} while (ret == 0);
>> +
>> +	if (ret == -ENOMEM) {
>> +		/* realloc failed. We must release former string */
>> +		pr_err("Failed to allocate string\n");
>> +		xfree(str);
>> +	} else if (ret < 0) {
>> +		/* vsnprintf failed */
>> +		pr_err("Failed to print string\n");
>> +		xfree(new);
>> +		new = NULL;
>> +	}
>> +	return new;
> This is an overkill for a very simple thing you want. You use this in 3 places.
>
> 1. To stat("/proc/%d/fd/%d"). It's MUCH simpler to just introduce a buffer of
>     large enough length (see the PSFDS constant for inspiration) and sprintf
>     the string there.
>
> 2. To attach plain string option to existing str. We already have the attach_option()
>     helper in mount.c
>
> 3. To attach read_fd=%d option with fd value. Make the combination of above two:
>     static buffer for the option itself and existing attach_option() to attach one.
>

These generic helpers can be used in other places and simplify the code.
I was thinking about moving them to util.c
But of course, it can be replaced by these three steps you mentioned, if 
you think, that such helpers doesn't worth to have and use.

>> +}
>> +
>> +static int autofs_find_pipe_read_end(int pgrp, long ino, int *read_fd)
>> +{
>> +	DIR *dir;
>> +	struct dirent *de;
>> +	int ret = -1;
>> +
>> +	dir = opendir_proc(pgrp, "fd");
>> +	if (dir == NULL)
>> +		return -1;
>> +
>> +	while ((de = readdir(dir))) {
>> +		struct stat buf;
>> +		int found, mode, fd;
>> +
>> +		if (dir_dots(de))
>> +			continue;
>> +
>> +		if (fstatat(dirfd(dir), de->d_name, &buf, 0) < 0) {
>> +			pr_perror("Failed to fstatat");
>> +			break;
>> +		}
>> +
>> +		fd = atoi(de->d_name);
>> +
>> +		found = autofs_check_fd_stat(&buf, pgrp, fd, ino, &mode);
>> +		if (found < 0)
>> +			break;
>> +		if (found && (mode == O_RDONLY)) {
>> +			*read_fd = fd;
>> +			ret = 0;
>> +			break;
>> +		}
>> +	}
>> +
>> +	closedir(dir);
>> +	close_pid_proc();
> Don't close_pid_proc() here, with this you kill the cache which you're not
> supposed to.

Ok.

>> +
>> +	return ret;
>> +}
>> +
>> +static int autofs_fixup_options(struct mount_info *pm, int pgrp, bool catatonic,
>> +				int kernel_fd, bool kernel_fd_alive,
>> +				int read_fd)
>> +{
>> +	char **options;
>> +	int nr_opts, i, err = -1;
>> +	char *new_opt;
>> +	struct mount_info *t;
>> +
>> +	new_opt = construct_string("pgrp=%d,fd=%d", pgrp, kernel_fd);
>> +	if (!new_opt)
>> +		return -1;
>> +
>> +	if (!catatonic && !kernel_fd_alive) {
>> +		/* Write end is closed or invalid
>> +		 * Let's introduce new options only for us to carry pipe
>> +		 * information. */
> So, do I get it right, that the only reason for doing the complex write-end
> discovery is to decide whether or not to keep read_fd=%d in the image?

I don't understand. Write end is known and just checked for presence and 
validity.
Discovery is done for read end. And we do it for two purposes:
1) Check, that it's empty
2) Dump, if write end is closed.

>> +		new_opt = add_to_string(new_opt, ",read_fd=%d", read_fd);
> If you need to keep some fs-specific data in the image that is not going to
> be passed as-is into the mount() call, just introduce the necessary optional
> fields in the mnt proto file.

This mnt proto file is generic.
Do you want me to add some autofs-specific fild to it (which can be 
unset, btw, thus I need another boolean flag)?


>> +	}
>> +	if (!new_opt)
>> +		return -1;
>> +
>> +	split(pm->options, ',', &options, &nr_opts);
>> +	if (!options)
>> +		return -1;
>> +
>> +	for (i = 0; i < nr_opts; i++) {
>> +		char *opt = options[i];
>> +
>> +		if (!strncmp(opt, "pgrp=", strlen("pgrp=")) ||
>> +		    !strncmp(opt, "fd=", strlen("fd=")) ||
>> +		    !strncmp(opt, "pipe_ino=", strlen("pipe_ino=")))
>> +			continue;
>> +
>> +		new_opt = add_to_string(new_opt, ",%s", opt);
>> +		if (!new_opt)
>> +			goto out;
>> +	}
>> +
>> +	xfree(pm->options);
>> +	pm->options = new_opt;
>> +
>> +	/* Change options for all bind-mounts */
> What for?

To make sure, that mount will be called with the options I fixed, and 
not with original ones (like it can be, because actual mount point to 
create is selected randomly).

>> +	list_for_each_entry(t, &pm->mnt_bind, mnt_bind) {
>> +		xfree(t->options);
>> +		t->options = xstrdup(pm->options);
>> +		if (!t->options)
>> +			goto out;
>> +	}
>> +
>> +	err = 0;
>> +
>> +	pr_info("autofs fixed options: \"%s\"\n", pm->options);
>> +
>> +out:
>> +	for (i = 0; i < nr_opts; i++)
>> +		xfree(options[i]);
>> +	xfree(options);
>> +	return err;
>> +}
>> +
>> +int autofs_dump(struct mount_info *pm)
>> +{
>> +	char *pgrp_opt, *fd_opt, *pipe_ino_opt;
>> +	int pgrp, vpgrp = 0;
>> +	int fd, kernel_fd, read_fd = -1;
>> +	long pipe_ino;
>> +	bool catatonic = false, kpipe_alive = false;
>> +
>> +	pr_info("autofs \"%s\" options: %s\n", pm->mountpoint, pm->options);
>> +
>> +	pgrp_opt = strstr(pm->options, "pgrp=");
>> +	if (!pgrp_opt) {
>> +		pr_err("Failed to find pgrp option\n");
>> +		return -1;
>> +	}
>> +
>> +	fd_opt = strstr(pm->options, "fd=");
>> +	if (!fd_opt) {
>> +		pr_err("Failed to find fd option\n");
>> +		return -1;
>> +	}
>> +
>> +	pipe_ino_opt = strstr(pm->options, "pipe_ino=");
>> +	if (!pipe_ino_opt) {
>> +		pr_err("Failed to find pipe_ino option\n");
>> +		return -1;
>> +	}
> The options parsing implementation is quite ineffective. First you call
> strstr() per-option, then sscanf() on found stuff, then you split() the
> whole string and strcmp() individual pieces. Plz, optimize this -- first
> split the opts, then scan, keeping the needed stuff into variables, then
> do whatever you need on the options parsed, then construct the opts
> string back for image.

Ок.

>> +
>> +	if (sscanf(pgrp_opt, "pgrp=%d", &pgrp) != 1) {
>> +		pr_err("Failed to get pgrp: %s\n", pgrp_opt);
>> +		return -1;
>> +	}
>> +
>> +	if (sscanf(fd_opt, "fd=%d", &kernel_fd) != 1) {
>> +		pr_err("Failed to get fd: %s\n", fd_opt);
>> +		return -1;
>> +	}
>> +
>> +	if (sscanf(pipe_ino_opt, "pipe_ino=%ld", &pipe_ino) != 1) {
>> +		pr_err("Failed to get pipe_ino: %s\n", pipe_ino_opt);
>> +		return -1;
>> +	}
>> +
>> +	/* When fd is equal to -1, it's a catatonic mount.
>> +	 * In this case fd, pipe_ino, and pgrp doesn't mean anything. */
>> +	if (kernel_fd == -1) {
>> +		catatonic = true;
>> +		goto fix_it;
>> +	}
>> +
>> +	/* We need to get virtual pgrp to restore mount */
>> +	vpgrp = pid_to_virt(pgrp);
>> +	if (!vpgrp) {
>> +		pr_err("failed to find pstree item with pid %d\n",
>> +				pgrp);
>> +		pr_err("Non-catatonic mount without master?\n");
>> +		return -1;
>> +	}
>> +
>> +	/* Let' check whether write end is still open */
>> +	switch (autofs_kernel_pipe_alive(pgrp, kernel_fd, pipe_ino)){
>> +		case 1:
>> +			/* This kernel pipe end fd.
>> +			 * We don't need to search for write end. */
>> +			kpipe_alive = true;
>> +		case 0:
>> +			break;
>> +		default:
>> +			pr_err("Failed to check fd %d in process %d\n",
>> +					kernel_fd, pgrp);
>> +			return -1;
>> +	}
>> +
>> +	/* Let's try to find process file descriptors, which corresponds to
>> +	 * desired pipe. */
>> +	if (autofs_find_pipe_read_end(pgrp, pipe_ino, &read_fd) < 0) {
>> +		pr_err("Failed to find read pipe fd (ino %ld) in process %d\n",
>> +				pipe_ino, pgrp);
>> +		return -1;
>> +	}
>> +
>> +	if (read_fd == -1) {
>> +		pr_err("Master %d doesn't have a read end of the pipe with "
>> +			"inode %ld opened\n", pgrp, pipe_ino);
>> +		pr_err("Abandoned mount or control was delegated to child?\n");
>> +		return -1;
>> +	}
>> +
>> +	/* Let's check, that read end is empty */
>> +	fd = open_proc(pgrp, "fd/%d", read_fd);
>> +	if (fd < 0)
>> +		return -1;
>> +
>> +	if (fd_has_data(fd)) {
>> +		pr_err("Process %d autofs pipe fd %d is not empty.\n", pgrp,
>> +				read_fd);
>> +		pr_err("Try again later.\n");
>> +		return -1;
>> +	}
>> +
>> +	close(fd);
>> +
>> +fix_it:
>> +	return autofs_fixup_options(pm, vpgrp, catatonic,
>> +				    kernel_fd, kpipe_alive,
>> +				    read_fd);
>> +}
> -- Pavel
>



More information about the CRIU mailing list