[CRIU] [PATCH v3 04/16] autofs: dump stage introduced
Pavel Emelyanov
xemul at parallels.com
Mon Dec 14 03:03:41 PST 2015
On 12/10/2015 06:16 PM, Stanislav Kinsburskiy wrote:
> +static char *add_to_string_vargs(char *str, const char *fmt, va_list args)
> +{
> + size_t offset = 0, delta;
> + int ret;
> + char *new;
> + va_list tmp;
> +
> + if (str)
> + offset = strlen(str);
> + delta = strlen(fmt) * 2;
> +
> + do {
> + ret = -ENOMEM;
> + new = xrealloc(str, offset + delta);
> + if (new) {
> + va_copy(tmp, args);
> + ret = vsnprintf(new + offset, delta, fmt, tmp);
> + if (ret >= delta) {
> + delta = ret +1;
> + str = new;
> + ret = 0;
> + }
> + }
> + } while (ret == 0);
> +
> + if (ret == -ENOMEM) {
> + /* realloc failed. We must release former string */
> + pr_err("Failed to allocate string\n");
> + xfree(str);
> + } else if (ret < 0) {
> + /* vsnprintf failed */
> + pr_err("Failed to print string\n");
> + xfree(new);
> + new = NULL;
> + }
> + return new;
This is an overkill for a very simple thing you want. You use this in 3 places.
1. To stat("/proc/%d/fd/%d"). It's MUCH simpler to just introduce a buffer of
large enough length (see the PSFDS constant for inspiration) and sprintf
the string there.
2. To attach plain string option to existing str. We already have the attach_option()
helper in mount.c
3. To attach read_fd=%d option with fd value. Make the combination of above two:
static buffer for the option itself and existing attach_option() to attach one.
> +}
> +
> +static int autofs_find_pipe_read_end(int pgrp, long ino, int *read_fd)
> +{
> + DIR *dir;
> + struct dirent *de;
> + int ret = -1;
> +
> + dir = opendir_proc(pgrp, "fd");
> + if (dir == NULL)
> + return -1;
> +
> + while ((de = readdir(dir))) {
> + struct stat buf;
> + int found, mode, fd;
> +
> + if (dir_dots(de))
> + continue;
> +
> + if (fstatat(dirfd(dir), de->d_name, &buf, 0) < 0) {
> + pr_perror("Failed to fstatat");
> + break;
> + }
> +
> + fd = atoi(de->d_name);
> +
> + found = autofs_check_fd_stat(&buf, pgrp, fd, ino, &mode);
> + if (found < 0)
> + break;
> + if (found && (mode == O_RDONLY)) {
> + *read_fd = fd;
> + ret = 0;
> + break;
> + }
> + }
> +
> + closedir(dir);
> + close_pid_proc();
Don't close_pid_proc() here, with this you kill the cache which you're not
supposed to.
> +
> + return ret;
> +}
> +
> +static int autofs_fixup_options(struct mount_info *pm, int pgrp, bool catatonic,
> + int kernel_fd, bool kernel_fd_alive,
> + int read_fd)
> +{
> + char **options;
> + int nr_opts, i, err = -1;
> + char *new_opt;
> + struct mount_info *t;
> +
> + new_opt = construct_string("pgrp=%d,fd=%d", pgrp, kernel_fd);
> + if (!new_opt)
> + return -1;
> +
> + if (!catatonic && !kernel_fd_alive) {
> + /* Write end is closed or invalid
> + * Let's introduce new options only for us to carry pipe
> + * information. */
So, do I get it right, that the only reason for doing the complex write-end
discovery is to decide whether or not to keep read_fd=%d in the image?
> + new_opt = add_to_string(new_opt, ",read_fd=%d", read_fd);
If you need to keep some fs-specific data in the image that is not going to
be passed as-is into the mount() call, just introduce the necessary optional
fields in the mnt proto file.
> + }
> + if (!new_opt)
> + return -1;
> +
> + split(pm->options, ',', &options, &nr_opts);
> + if (!options)
> + return -1;
> +
> + for (i = 0; i < nr_opts; i++) {
> + char *opt = options[i];
> +
> + if (!strncmp(opt, "pgrp=", strlen("pgrp=")) ||
> + !strncmp(opt, "fd=", strlen("fd=")) ||
> + !strncmp(opt, "pipe_ino=", strlen("pipe_ino=")))
> + continue;
> +
> + new_opt = add_to_string(new_opt, ",%s", opt);
> + if (!new_opt)
> + goto out;
> + }
> +
> + xfree(pm->options);
> + pm->options = new_opt;
> +
> + /* Change options for all bind-mounts */
What for?
> + list_for_each_entry(t, &pm->mnt_bind, mnt_bind) {
> + xfree(t->options);
> + t->options = xstrdup(pm->options);
> + if (!t->options)
> + goto out;
> + }
> +
> + err = 0;
> +
> + pr_info("autofs fixed options: \"%s\"\n", pm->options);
> +
> +out:
> + for (i = 0; i < nr_opts; i++)
> + xfree(options[i]);
> + xfree(options);
> + return err;
> +}
> +
> +int autofs_dump(struct mount_info *pm)
> +{
> + char *pgrp_opt, *fd_opt, *pipe_ino_opt;
> + int pgrp, vpgrp = 0;
> + int fd, kernel_fd, read_fd = -1;
> + long pipe_ino;
> + bool catatonic = false, kpipe_alive = false;
> +
> + pr_info("autofs \"%s\" options: %s\n", pm->mountpoint, pm->options);
> +
> + pgrp_opt = strstr(pm->options, "pgrp=");
> + if (!pgrp_opt) {
> + pr_err("Failed to find pgrp option\n");
> + return -1;
> + }
> +
> + fd_opt = strstr(pm->options, "fd=");
> + if (!fd_opt) {
> + pr_err("Failed to find fd option\n");
> + return -1;
> + }
> +
> + pipe_ino_opt = strstr(pm->options, "pipe_ino=");
> + if (!pipe_ino_opt) {
> + pr_err("Failed to find pipe_ino option\n");
> + return -1;
> + }
The options parsing implementation is quite ineffective. First you call
strstr() per-option, then sscanf() on found stuff, then you split() the
whole string and strcmp() individual pieces. Plz, optimize this -- first
split the opts, then scan, keeping the needed stuff into variables, then
do whatever you need on the options parsed, then construct the opts
string back for image.
> +
> + if (sscanf(pgrp_opt, "pgrp=%d", &pgrp) != 1) {
> + pr_err("Failed to get pgrp: %s\n", pgrp_opt);
> + return -1;
> + }
> +
> + if (sscanf(fd_opt, "fd=%d", &kernel_fd) != 1) {
> + pr_err("Failed to get fd: %s\n", fd_opt);
> + return -1;
> + }
> +
> + if (sscanf(pipe_ino_opt, "pipe_ino=%ld", &pipe_ino) != 1) {
> + pr_err("Failed to get pipe_ino: %s\n", pipe_ino_opt);
> + return -1;
> + }
> +
> + /* When fd is equal to -1, it's a catatonic mount.
> + * In this case fd, pipe_ino, and pgrp doesn't mean anything. */
> + if (kernel_fd == -1) {
> + catatonic = true;
> + goto fix_it;
> + }
> +
> + /* We need to get virtual pgrp to restore mount */
> + vpgrp = pid_to_virt(pgrp);
> + if (!vpgrp) {
> + pr_err("failed to find pstree item with pid %d\n",
> + pgrp);
> + pr_err("Non-catatonic mount without master?\n");
> + return -1;
> + }
> +
> + /* Let' check whether write end is still open */
> + switch (autofs_kernel_pipe_alive(pgrp, kernel_fd, pipe_ino)){
> + case 1:
> + /* This kernel pipe end fd.
> + * We don't need to search for write end. */
> + kpipe_alive = true;
> + case 0:
> + break;
> + default:
> + pr_err("Failed to check fd %d in process %d\n",
> + kernel_fd, pgrp);
> + return -1;
> + }
> +
> + /* Let's try to find process file descriptors, which corresponds to
> + * desired pipe. */
> + if (autofs_find_pipe_read_end(pgrp, pipe_ino, &read_fd) < 0) {
> + pr_err("Failed to find read pipe fd (ino %ld) in process %d\n",
> + pipe_ino, pgrp);
> + return -1;
> + }
> +
> + if (read_fd == -1) {
> + pr_err("Master %d doesn't have a read end of the pipe with "
> + "inode %ld opened\n", pgrp, pipe_ino);
> + pr_err("Abandoned mount or control was delegated to child?\n");
> + return -1;
> + }
> +
> + /* Let's check, that read end is empty */
> + fd = open_proc(pgrp, "fd/%d", read_fd);
> + if (fd < 0)
> + return -1;
> +
> + if (fd_has_data(fd)) {
> + pr_err("Process %d autofs pipe fd %d is not empty.\n", pgrp,
> + read_fd);
> + pr_err("Try again later.\n");
> + return -1;
> + }
> +
> + close(fd);
> +
> +fix_it:
> + return autofs_fixup_options(pm, vpgrp, catatonic,
> + kernel_fd, kpipe_alive,
> + read_fd);
> +}
-- Pavel
More information about the CRIU
mailing list