[CRIU] [PATCH 1/2] mount: delay restoring readonly mount flag untill all files ready

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Tue Sep 11 18:53:30 MSK 2018


We can have ghost-files on readonly mounts, for them we will need to
recreate the file on restore, and we can't do that if mount is readonly,
so the idea is to move restoring readonly mount flags after all files are
restored, before that restore process will see mounts writable.

There is an exception for overmounted mounts as it is not so easy to
set flags on them at these late point. Other exception is if the mount
has readonly superblock - there can be no ghost-files on such a mount.

The first point where we delay readonly is do_new_mount and the second
is do_bind_mount. The latter is a bit more complex as we need to handle
nesting from source mount which can be also delayed/undelayed.

https://jira.sw.ru/browse/PSBM-82991
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 criu/cr-restore.c    |   2 +
 criu/include/mount.h |   2 +
 criu/mount.c         | 112 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 6f22fb787..258ab916d 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -3235,6 +3235,8 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 		/* Wait when all tasks restored all files */
 		if (restore_wait_other_tasks())
 			goto err_nv;
+		if (remount_readonly_mounts())
+			goto err_nv;
 	}
 
 	/*
diff --git a/criu/include/mount.h b/criu/include/mount.h
index ca17b059a..3dec0ec0f 100644
--- a/criu/include/mount.h
+++ b/criu/include/mount.h
@@ -126,4 +126,6 @@ extern struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool fo
 
 extern int check_mnt_id(void);
 
+extern int remount_readonly_mounts(void);
+
 #endif /* __CR_MOUNT_H__ */
diff --git a/criu/mount.c b/criu/mount.c
index 781870eea..32d9c08b6 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -2084,6 +2084,16 @@ static int do_new_mount(struct mount_info *mi)
 		close(fd);
 	}
 
+	/*
+	 * Restoring ghost files on readonly mounts requires write access,
+	 * remount_readonly_mounts() will set these flags after all files
+	 * restored
+	 */
+	if (!mnt_is_overmounted(mi))
+		mflags &= ~MS_RDONLY;
+	else if (mflags & MS_RDONLY)
+		pr_warn("Can't delay making mount readonly for overmounted mount\n");
+
 	if (mflags && mount(NULL, mi->mountpoint, NULL,
 				MS_REMOUNT | MS_BIND | mflags, NULL)) {
 		pr_perror("Unable to apply bind-mount options");
@@ -2161,7 +2171,7 @@ static int do_bind_mount(struct mount_info *mi)
 {
 	char mnt_fd_path[PSFDS];
 	char *root, *cut_root, rpath[PATH_MAX];
-	unsigned long mflags;
+	unsigned long mflags, bmflags = 0;
 	int exit_code = -1, mp_len;
 	bool shared = false;
 	bool master = false;
@@ -2281,7 +2291,15 @@ static int do_bind_mount(struct mount_info *mi)
 	}
 
 	mflags = mi->flags & (~MS_PROPAGATE);
-	if (!mi->bind || mflags != (mi->bind->flags & (~MS_PROPAGATE)))
+	if (!mnt_is_overmounted(mi) && !(mi->sb_flags & MS_RDONLY))
+		mflags &= ~MS_RDONLY;
+	if (mi->bind) {
+		bmflags = mi->bind->flags & (~MS_PROPAGATE);
+		if (!mnt_is_overmounted(mi->bind) && !(mi->bind->sb_flags & MS_RDONLY))
+			bmflags &= ~MS_RDONLY;
+	}
+
+	if (!mi->bind || mflags != bmflags)
 		if (mount(NULL, mi->mountpoint, NULL, MS_BIND | MS_REMOUNT | mflags, NULL)) {
 			pr_perror("Can't mount at %s", mi->mountpoint);
 			goto err;
@@ -3654,3 +3672,93 @@ void clean_cr_time_mounts(void)
 }
 
 struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
+
+int __remount_readonly_mounts(struct ns_id *ns)
+{
+	struct mount_info *mnt;
+
+	for (mnt = mntinfo; mnt; mnt = mnt->next) {
+		if (ns && mnt->nsid != ns)
+			continue;
+
+		if (mnt_is_overmounted(mnt))
+			continue;
+
+		if (mnt->sb_flags & MS_RDONLY)
+			continue;
+
+		if (!(mnt->flags & MS_RDONLY))
+			continue;
+
+		if (mount(NULL, mnt->ns_mountpoint, NULL,
+			  MS_REMOUNT | MS_BIND | (mnt->flags & (~MS_PROPAGATE)),
+			  NULL)) {
+			pr_perror("Failed to restore %d:%s mount flags %x",
+				  mnt->mnt_id, mnt->mountpoint, mnt->flags);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int ns_remount_readonly_mounts(void *arg)
+{
+	struct ns_id *nsid;
+
+	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
+		int mntns_fd;
+
+		if (nsid->nd != &mnt_ns_desc)
+			continue;
+
+		mntns_fd = fdstore_get(nsid->mnt.nsfd_id);
+		if (mntns_fd < 0)
+			return 1;
+
+		if (setns(mntns_fd, CLONE_NEWNS) < 0) {
+			pr_perror("`- Can't switch");
+			close(mntns_fd);
+			return 1;
+		}
+		close(mntns_fd);
+
+		pr_info("Switched to mntns %u:%u/n", nsid->id, nsid->kid);
+
+		if (__remount_readonly_mounts(nsid))
+			return 1;
+	}
+
+	return 0;
+}
+
+int remount_readonly_mounts(void)
+{
+	int pid, status;
+
+	if (!(root_ns_mask & CLONE_NEWNS))
+		return __remount_readonly_mounts(NULL);
+
+	/*
+	 * Need a helper process because the root task can share fs via
+	 * CLONE_FS and we would not be able to enter mount namespaces
+	 */
+	pid = clone_noasan(ns_remount_readonly_mounts,
+			   CLONE_VFORK | CLONE_VM | CLONE_FILES
+			   | CLONE_IO | CLONE_SIGHAND
+			   | CLONE_SYSVSEM, NULL);
+	if (pid == -1) {
+		pr_perror("Can't clone helper process");
+		return -1;
+	}
+
+	errno = 0;
+	if (waitpid(pid, &status, __WALL) != pid || !WIFEXITED(status)
+	    || WEXITSTATUS(status)) {
+		pr_err("Can't wait or bad status: errno=%d, status=%d\n",
+		       errno, status);
+		return -1;
+	}
+
+	return 0;
+}
-- 
2.17.1



More information about the CRIU mailing list