[CRIU] [PATCH 1/2] mount: delay restoring readonly mount flag untill all files ready

Andrew Vagin avagin at virtuozzo.com
Wed Sep 12 21:04:30 MSK 2018


On Tue, Sep 11, 2018 at 06:53:30PM +0300, Pavel Tikhomirov wrote:
> We can have ghost-files on readonly mounts, for them we will need to
> recreate the file on restore, and we can't do that if mount is readonly,
> so the idea is to move restoring readonly mount flags after all files are
> restored, before that restore process will see mounts writable.

I don't like the idea to remount all readonly mounts. Can we don't this
hack only for mounts with ghost files?

What is about link-remap files?

> 
> There is an exception for overmounted mounts as it is not so easy to
> set flags on them at these late point. Other exception is if the mount
> has readonly superblock - there can be no ghost-files on such a mount.
> 
> The first point where we delay readonly is do_new_mount and the second
> is do_bind_mount. The latter is a bit more complex as we need to handle
> nesting from source mount which can be also delayed/undelayed.
> 
> https://jira.sw.ru/browse/PSBM-82991

Pls, don't add private links into a commit message.

> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> ---
>  criu/cr-restore.c    |   2 +
>  criu/include/mount.h |   2 +
>  criu/mount.c         | 112 ++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 114 insertions(+), 2 deletions(-)
> 
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 6f22fb787..258ab916d 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -3235,6 +3235,8 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
>  		/* Wait when all tasks restored all files */
>  		if (restore_wait_other_tasks())
>  			goto err_nv;
> +		if (remount_readonly_mounts())
> +			goto err_nv;
>  	}
>  
>  	/*
> diff --git a/criu/include/mount.h b/criu/include/mount.h
> index ca17b059a..3dec0ec0f 100644
> --- a/criu/include/mount.h
> +++ b/criu/include/mount.h
> @@ -126,4 +126,6 @@ extern struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool fo
>  
>  extern int check_mnt_id(void);
>  
> +extern int remount_readonly_mounts(void);
> +
>  #endif /* __CR_MOUNT_H__ */
> diff --git a/criu/mount.c b/criu/mount.c
> index 781870eea..32d9c08b6 100644
> --- a/criu/mount.c
> +++ b/criu/mount.c
> @@ -2084,6 +2084,16 @@ static int do_new_mount(struct mount_info *mi)
>  		close(fd);
>  	}
>  
> +	/*
> +	 * Restoring ghost files on readonly mounts requires write access,
> +	 * remount_readonly_mounts() will set these flags after all files
> +	 * restored
> +	 */
> +	if (!mnt_is_overmounted(mi))
> +		mflags &= ~MS_RDONLY;
> +	else if (mflags & MS_RDONLY)
> +		pr_warn("Can't delay making mount readonly for overmounted mount\n");
> +
>  	if (mflags && mount(NULL, mi->mountpoint, NULL,
>  				MS_REMOUNT | MS_BIND | mflags, NULL)) {
>  		pr_perror("Unable to apply bind-mount options");
> @@ -2161,7 +2171,7 @@ static int do_bind_mount(struct mount_info *mi)
>  {
>  	char mnt_fd_path[PSFDS];
>  	char *root, *cut_root, rpath[PATH_MAX];
> -	unsigned long mflags;
> +	unsigned long mflags, bmflags = 0;
>  	int exit_code = -1, mp_len;
>  	bool shared = false;
>  	bool master = false;
> @@ -2281,7 +2291,15 @@ static int do_bind_mount(struct mount_info *mi)
>  	}
>  
>  	mflags = mi->flags & (~MS_PROPAGATE);
> -	if (!mi->bind || mflags != (mi->bind->flags & (~MS_PROPAGATE)))
> +	if (!mnt_is_overmounted(mi) && !(mi->sb_flags & MS_RDONLY))
> +		mflags &= ~MS_RDONLY;
> +	if (mi->bind) {
> +		bmflags = mi->bind->flags & (~MS_PROPAGATE);
> +		if (!mnt_is_overmounted(mi->bind) && !(mi->bind->sb_flags & MS_RDONLY))
> +			bmflags &= ~MS_RDONLY;
> +	}
> +
> +	if (!mi->bind || mflags != bmflags)
>  		if (mount(NULL, mi->mountpoint, NULL, MS_BIND | MS_REMOUNT | mflags, NULL)) {
>  			pr_perror("Can't mount at %s", mi->mountpoint);
>  			goto err;
> @@ -3654,3 +3672,93 @@ void clean_cr_time_mounts(void)
>  }
>  
>  struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
> +
> +int __remount_readonly_mounts(struct ns_id *ns)
> +{
> +	struct mount_info *mnt;
> +
> +	for (mnt = mntinfo; mnt; mnt = mnt->next) {
> +		if (ns && mnt->nsid != ns)
> +			continue;
> +
> +		if (mnt_is_overmounted(mnt))
> +			continue;
> +
> +		if (mnt->sb_flags & MS_RDONLY)
> +			continue;
> +
> +		if (!(mnt->flags & MS_RDONLY))
> +			continue;
> +
> +		if (mount(NULL, mnt->ns_mountpoint, NULL,
> +			  MS_REMOUNT | MS_BIND | (mnt->flags & (~MS_PROPAGATE)),
> +			  NULL)) {
> +			pr_perror("Failed to restore %d:%s mount flags %x",
> +				  mnt->mnt_id, mnt->mountpoint, mnt->flags);
> +			return -1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +int ns_remount_readonly_mounts(void *arg)
> +{
> +	struct ns_id *nsid;
> +
> +	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> +		int mntns_fd;
> +
> +		if (nsid->nd != &mnt_ns_desc)
> +			continue;
> +
> +		mntns_fd = fdstore_get(nsid->mnt.nsfd_id);
> +		if (mntns_fd < 0)
> +			return 1;
> +
> +		if (setns(mntns_fd, CLONE_NEWNS) < 0) {
> +			pr_perror("`- Can't switch");
> +			close(mntns_fd);
> +			return 1;
> +		}
> +		close(mntns_fd);
> +
> +		pr_info("Switched to mntns %u:%u/n", nsid->id, nsid->kid);
> +
> +		if (__remount_readonly_mounts(nsid))
> +			return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +int remount_readonly_mounts(void)
> +{
> +	int pid, status;
> +
> +	if (!(root_ns_mask & CLONE_NEWNS))
> +		return __remount_readonly_mounts(NULL);
> +
> +	/*
> +	 * Need a helper process because the root task can share fs via
> +	 * CLONE_FS and we would not be able to enter mount namespaces
> +	 */
> +	pid = clone_noasan(ns_remount_readonly_mounts,
> +			   CLONE_VFORK | CLONE_VM | CLONE_FILES
> +			   | CLONE_IO | CLONE_SIGHAND
> +			   | CLONE_SYSVSEM, NULL);
> +	if (pid == -1) {
> +		pr_perror("Can't clone helper process");
> +		return -1;
> +	}
> +
> +	errno = 0;
> +	if (waitpid(pid, &status, __WALL) != pid || !WIFEXITED(status)
> +	    || WEXITSTATUS(status)) {
> +		pr_err("Can't wait or bad status: errno=%d, status=%d\n",
> +		       errno, status);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> -- 
> 2.17.1
> 


More information about the CRIU mailing list