[CRIU] [PATCH v2 2/5] mount: delay restoring readonly mount flag until all files ready

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Mon Sep 17 14:47:54 MSK 2018


We can have ghost-files on readonly mounts, for them we will need to
recreate the file on restore, and we can't do that if mount is readonly,
so the idea is to move restoring readonly mount flags after all files are
restored, before that restore process will see mounts writable.

There is an exception for overmounted mounts as it is not so easy to
set flags on them at these late point. Other exception is if the mount
has readonly superblock - there can be no ghost-files on such a mount.

The first point where we delay readonly is do_new_mount and the second
is do_bind_mount. The latter is a bit more complex as we need to handle
nesting from source mount which can be also delayed/undelayed.

I prefer leaving all mounts writable where possible as we might need
write access not only for ghost files but also link-remaps and may be
something else.

https://jira.sw.ru/browse/PSBM-82991

v2: minor commit message cleanup and remove warn
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 criu/cr-restore.c    |   2 +
 criu/include/mount.h |   2 +
 criu/mount.c         | 110 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 45ed9c63a..50326e347 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -3235,6 +3235,8 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 		/* Wait when all tasks restored all files */
 		if (restore_wait_other_tasks())
 			goto err_nv;
+		if (remount_readonly_mounts())
+			goto err_nv;
 	}
 
 	/*
diff --git a/criu/include/mount.h b/criu/include/mount.h
index e7d026264..776c348cd 100644
--- a/criu/include/mount.h
+++ b/criu/include/mount.h
@@ -128,4 +128,6 @@ extern struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool fo
 
 extern int check_mnt_id(void);
 
+extern int remount_readonly_mounts(void);
+
 #endif /* __CR_MOUNT_H__ */
diff --git a/criu/mount.c b/criu/mount.c
index ba202f0c9..3c19c280b 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -2105,6 +2105,14 @@ static int do_new_mount(struct mount_info *mi)
 		close(fd);
 	}
 
+	/*
+	 * Restoring ghost files on readonly mounts requires write access,
+	 * remount_readonly_mounts() will set these flags after all files
+	 * restored
+	 */
+	if (!mnt_is_overmounted(mi))
+		mflags &= ~MS_RDONLY;
+
 	if (mflags && mount(NULL, mi->mountpoint, NULL,
 				MS_REMOUNT | MS_BIND | mflags, NULL)) {
 		pr_perror("Unable to apply bind-mount options");
@@ -2182,7 +2190,7 @@ static int do_bind_mount(struct mount_info *mi)
 {
 	char mnt_fd_path[PSFDS];
 	char *root, *cut_root, rpath[PATH_MAX];
-	unsigned long mflags;
+	unsigned long mflags, bmflags = 0;
 	int exit_code = -1, mp_len;
 	bool shared = false;
 	bool master = false;
@@ -2302,7 +2310,15 @@ static int do_bind_mount(struct mount_info *mi)
 	}
 
 	mflags = mi->flags & (~MS_PROPAGATE);
-	if (!mi->bind || mflags != (mi->bind->flags & (~MS_PROPAGATE)))
+	if (!mnt_is_overmounted(mi) && !(mi->sb_flags & MS_RDONLY))
+		mflags &= ~MS_RDONLY;
+	if (mi->bind) {
+		bmflags = mi->bind->flags & (~MS_PROPAGATE);
+		if (!mnt_is_overmounted(mi->bind) && !(mi->bind->sb_flags & MS_RDONLY))
+			bmflags &= ~MS_RDONLY;
+	}
+
+	if (!mi->bind || mflags != bmflags)
 		if (mount(NULL, mi->mountpoint, NULL, MS_BIND | MS_REMOUNT | mflags, NULL)) {
 			pr_perror("Can't mount at %s", mi->mountpoint);
 			goto err;
@@ -3683,3 +3699,93 @@ void clean_cr_time_mounts(void)
 }
 
 struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
+
+int __remount_readonly_mounts(struct ns_id *ns)
+{
+	struct mount_info *mnt;
+
+	for (mnt = mntinfo; mnt; mnt = mnt->next) {
+		if (ns && mnt->nsid != ns)
+			continue;
+
+		if (mnt_is_overmounted(mnt))
+			continue;
+
+		if (mnt->sb_flags & MS_RDONLY)
+			continue;
+
+		if (!(mnt->flags & MS_RDONLY))
+			continue;
+
+		if (mount(NULL, mnt->ns_mountpoint, NULL,
+			  MS_REMOUNT | MS_BIND | (mnt->flags & (~MS_PROPAGATE)),
+			  NULL)) {
+			pr_perror("Failed to restore %d:%s mount flags %x",
+				  mnt->mnt_id, mnt->mountpoint, mnt->flags);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int ns_remount_readonly_mounts(void *arg)
+{
+	struct ns_id *nsid;
+
+	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
+		int mntns_fd;
+
+		if (nsid->nd != &mnt_ns_desc)
+			continue;
+
+		mntns_fd = fdstore_get(nsid->mnt.nsfd_id);
+		if (mntns_fd < 0)
+			return 1;
+
+		if (setns(mntns_fd, CLONE_NEWNS) < 0) {
+			pr_perror("`- Can't switch");
+			close(mntns_fd);
+			return 1;
+		}
+		close(mntns_fd);
+
+		pr_info("Switched to mntns %u:%u/n", nsid->id, nsid->kid);
+
+		if (__remount_readonly_mounts(nsid))
+			return 1;
+	}
+
+	return 0;
+}
+
+int remount_readonly_mounts(void)
+{
+	int pid, status;
+
+	if (!(root_ns_mask & CLONE_NEWNS))
+		return __remount_readonly_mounts(NULL);
+
+	/*
+	 * Need a helper process because the root task can share fs via
+	 * CLONE_FS and we would not be able to enter mount namespaces
+	 */
+	pid = clone_noasan(ns_remount_readonly_mounts,
+			   CLONE_VFORK | CLONE_VM | CLONE_FILES
+			   | CLONE_IO | CLONE_SIGHAND
+			   | CLONE_SYSVSEM, NULL);
+	if (pid == -1) {
+		pr_perror("Can't clone helper process");
+		return -1;
+	}
+
+	errno = 0;
+	if (waitpid(pid, &status, __WALL) != pid || !WIFEXITED(status)
+	    || WEXITSTATUS(status)) {
+		pr_err("Can't wait or bad status: errno=%d, status=%d\n",
+		       errno, status);
+		return -1;
+	}
+
+	return 0;
+}
-- 
2.17.1



More information about the CRIU mailing list