[CRIU] [PATCH 1/2] mount: delay restoring readonly mount flag untill all files ready
Andrew Vagin
avagin at virtuozzo.com
Wed Sep 12 21:04:30 MSK 2018
On Tue, Sep 11, 2018 at 06:53:30PM +0300, Pavel Tikhomirov wrote:
> We can have ghost-files on readonly mounts, for them we will need to
> recreate the file on restore, and we can't do that if mount is readonly,
> so the idea is to move restoring readonly mount flags after all files are
> restored, before that restore process will see mounts writable.
I don't like the idea to remount all readonly mounts. Can we don't this
hack only for mounts with ghost files?
What is about link-remap files?
>
> There is an exception for overmounted mounts as it is not so easy to
> set flags on them at these late point. Other exception is if the mount
> has readonly superblock - there can be no ghost-files on such a mount.
>
> The first point where we delay readonly is do_new_mount and the second
> is do_bind_mount. The latter is a bit more complex as we need to handle
> nesting from source mount which can be also delayed/undelayed.
>
> https://jira.sw.ru/browse/PSBM-82991
Pls, don't add private links into a commit message.
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> ---
> criu/cr-restore.c | 2 +
> criu/include/mount.h | 2 +
> criu/mount.c | 112 ++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 114 insertions(+), 2 deletions(-)
>
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 6f22fb787..258ab916d 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -3235,6 +3235,8 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
> /* Wait when all tasks restored all files */
> if (restore_wait_other_tasks())
> goto err_nv;
> + if (remount_readonly_mounts())
> + goto err_nv;
> }
>
> /*
> diff --git a/criu/include/mount.h b/criu/include/mount.h
> index ca17b059a..3dec0ec0f 100644
> --- a/criu/include/mount.h
> +++ b/criu/include/mount.h
> @@ -126,4 +126,6 @@ extern struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool fo
>
> extern int check_mnt_id(void);
>
> +extern int remount_readonly_mounts(void);
> +
> #endif /* __CR_MOUNT_H__ */
> diff --git a/criu/mount.c b/criu/mount.c
> index 781870eea..32d9c08b6 100644
> --- a/criu/mount.c
> +++ b/criu/mount.c
> @@ -2084,6 +2084,16 @@ static int do_new_mount(struct mount_info *mi)
> close(fd);
> }
>
> + /*
> + * Restoring ghost files on readonly mounts requires write access,
> + * remount_readonly_mounts() will set these flags after all files
> + * restored
> + */
> + if (!mnt_is_overmounted(mi))
> + mflags &= ~MS_RDONLY;
> + else if (mflags & MS_RDONLY)
> + pr_warn("Can't delay making mount readonly for overmounted mount\n");
> +
> if (mflags && mount(NULL, mi->mountpoint, NULL,
> MS_REMOUNT | MS_BIND | mflags, NULL)) {
> pr_perror("Unable to apply bind-mount options");
> @@ -2161,7 +2171,7 @@ static int do_bind_mount(struct mount_info *mi)
> {
> char mnt_fd_path[PSFDS];
> char *root, *cut_root, rpath[PATH_MAX];
> - unsigned long mflags;
> + unsigned long mflags, bmflags = 0;
> int exit_code = -1, mp_len;
> bool shared = false;
> bool master = false;
> @@ -2281,7 +2291,15 @@ static int do_bind_mount(struct mount_info *mi)
> }
>
> mflags = mi->flags & (~MS_PROPAGATE);
> - if (!mi->bind || mflags != (mi->bind->flags & (~MS_PROPAGATE)))
> + if (!mnt_is_overmounted(mi) && !(mi->sb_flags & MS_RDONLY))
> + mflags &= ~MS_RDONLY;
> + if (mi->bind) {
> + bmflags = mi->bind->flags & (~MS_PROPAGATE);
> + if (!mnt_is_overmounted(mi->bind) && !(mi->bind->sb_flags & MS_RDONLY))
> + bmflags &= ~MS_RDONLY;
> + }
> +
> + if (!mi->bind || mflags != bmflags)
> if (mount(NULL, mi->mountpoint, NULL, MS_BIND | MS_REMOUNT | mflags, NULL)) {
> pr_perror("Can't mount at %s", mi->mountpoint);
> goto err;
> @@ -3654,3 +3672,93 @@ void clean_cr_time_mounts(void)
> }
>
> struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
> +
> +int __remount_readonly_mounts(struct ns_id *ns)
> +{
> + struct mount_info *mnt;
> +
> + for (mnt = mntinfo; mnt; mnt = mnt->next) {
> + if (ns && mnt->nsid != ns)
> + continue;
> +
> + if (mnt_is_overmounted(mnt))
> + continue;
> +
> + if (mnt->sb_flags & MS_RDONLY)
> + continue;
> +
> + if (!(mnt->flags & MS_RDONLY))
> + continue;
> +
> + if (mount(NULL, mnt->ns_mountpoint, NULL,
> + MS_REMOUNT | MS_BIND | (mnt->flags & (~MS_PROPAGATE)),
> + NULL)) {
> + pr_perror("Failed to restore %d:%s mount flags %x",
> + mnt->mnt_id, mnt->mountpoint, mnt->flags);
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> +
> +int ns_remount_readonly_mounts(void *arg)
> +{
> + struct ns_id *nsid;
> +
> + for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> + int mntns_fd;
> +
> + if (nsid->nd != &mnt_ns_desc)
> + continue;
> +
> + mntns_fd = fdstore_get(nsid->mnt.nsfd_id);
> + if (mntns_fd < 0)
> + return 1;
> +
> + if (setns(mntns_fd, CLONE_NEWNS) < 0) {
> + pr_perror("`- Can't switch");
> + close(mntns_fd);
> + return 1;
> + }
> + close(mntns_fd);
> +
> + pr_info("Switched to mntns %u:%u/n", nsid->id, nsid->kid);
> +
> + if (__remount_readonly_mounts(nsid))
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +int remount_readonly_mounts(void)
> +{
> + int pid, status;
> +
> + if (!(root_ns_mask & CLONE_NEWNS))
> + return __remount_readonly_mounts(NULL);
> +
> + /*
> + * Need a helper process because the root task can share fs via
> + * CLONE_FS and we would not be able to enter mount namespaces
> + */
> + pid = clone_noasan(ns_remount_readonly_mounts,
> + CLONE_VFORK | CLONE_VM | CLONE_FILES
> + | CLONE_IO | CLONE_SIGHAND
> + | CLONE_SYSVSEM, NULL);
> + if (pid == -1) {
> + pr_perror("Can't clone helper process");
> + return -1;
> + }
> +
> + errno = 0;
> + if (waitpid(pid, &status, __WALL) != pid || !WIFEXITED(status)
> + || WEXITSTATUS(status)) {
> + pr_err("Can't wait or bad status: errno=%d, status=%d\n",
> + errno, status);
> + return -1;
> + }
> +
> + return 0;
> +}
> --
> 2.17.1
>
More information about the CRIU
mailing list