[CRIU] [PATCH 1/2] mount: delay restoring readonly mount flag untill all files ready

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Thu Sep 13 11:17:28 MSK 2018


On 09/12/2018 09:04 PM, Andrew Vagin wrote:
> On Tue, Sep 11, 2018 at 06:53:30PM +0300, Pavel Tikhomirov wrote:
>> We can have ghost-files on readonly mounts, for them we will need to
>> recreate the file on restore, and we can't do that if mount is readonly,
>> so the idea is to move restoring readonly mount flags after all files are
>> restored, before that restore process will see mounts writable.
> 
> I don't like the idea to remount all readonly mounts. 

Not all, but only the once with writable superblock, note the exceptions 
below.

> Can we don't this hack only for mounts with ghost files?

When restoring ghost files we can write-access single ghost file from 
different mounts of it's superblock:

1) create_ghost, clean_one_remap - we create it on the mount from which 
it was opened and if it was open multiple times from different mounts - 
the one we've seen first on dump

2) rfi_remap - from "common bind-mount"

So we need the hack for each of the group of mounts with same 
superblock, when one from the group has ghost files open.

> 
> What is about link-remap files?

Surely we have same problem for all write accesses to fs on restore, so 
for you previous question, the best way is to leave all mounts writable 
where possible until we finish with files restore.

> 
>>
>> There is an exception for overmounted mounts as it is not so easy to
>> set flags on them at these late point. Other exception is if the mount
>> has readonly superblock - there can be no ghost-files on such a mount.
>>
>> The first point where we delay readonly is do_new_mount and the second
>> is do_bind_mount. The latter is a bit more complex as we need to handle
>> nesting from source mount which can be also delayed/undelayed.
>>
>> https://jira.sw.ru/browse/PSBM-82991
> 
> Pls, don't add private links into a commit message.

Ok

> 
>> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
>> ---
>>   criu/cr-restore.c    |   2 +
>>   criu/include/mount.h |   2 +
>>   criu/mount.c         | 112 ++++++++++++++++++++++++++++++++++++++++++-
>>   3 files changed, 114 insertions(+), 2 deletions(-)
>>
>> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
>> index 6f22fb787..258ab916d 100644
>> --- a/criu/cr-restore.c
>> +++ b/criu/cr-restore.c
>> @@ -3235,6 +3235,8 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
>>   		/* Wait when all tasks restored all files */
>>   		if (restore_wait_other_tasks())
>>   			goto err_nv;
>> +		if (remount_readonly_mounts())
>> +			goto err_nv;
>>   	}
>>   
>>   	/*
>> diff --git a/criu/include/mount.h b/criu/include/mount.h
>> index ca17b059a..3dec0ec0f 100644
>> --- a/criu/include/mount.h
>> +++ b/criu/include/mount.h
>> @@ -126,4 +126,6 @@ extern struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool fo
>>   
>>   extern int check_mnt_id(void);
>>   
>> +extern int remount_readonly_mounts(void);
>> +
>>   #endif /* __CR_MOUNT_H__ */
>> diff --git a/criu/mount.c b/criu/mount.c
>> index 781870eea..32d9c08b6 100644
>> --- a/criu/mount.c
>> +++ b/criu/mount.c
>> @@ -2084,6 +2084,16 @@ static int do_new_mount(struct mount_info *mi)
>>   		close(fd);
>>   	}
>>   
>> +	/*
>> +	 * Restoring ghost files on readonly mounts requires write access,
>> +	 * remount_readonly_mounts() will set these flags after all files
>> +	 * restored
>> +	 */
>> +	if (!mnt_is_overmounted(mi))
>> +		mflags &= ~MS_RDONLY;
>> +	else if (mflags & MS_RDONLY)
>> +		pr_warn("Can't delay making mount readonly for overmounted mount\n");
>> +
>>   	if (mflags && mount(NULL, mi->mountpoint, NULL,
>>   				MS_REMOUNT | MS_BIND | mflags, NULL)) {
>>   		pr_perror("Unable to apply bind-mount options");
>> @@ -2161,7 +2171,7 @@ static int do_bind_mount(struct mount_info *mi)
>>   {
>>   	char mnt_fd_path[PSFDS];
>>   	char *root, *cut_root, rpath[PATH_MAX];
>> -	unsigned long mflags;
>> +	unsigned long mflags, bmflags = 0;
>>   	int exit_code = -1, mp_len;
>>   	bool shared = false;
>>   	bool master = false;
>> @@ -2281,7 +2291,15 @@ static int do_bind_mount(struct mount_info *mi)
>>   	}
>>   
>>   	mflags = mi->flags & (~MS_PROPAGATE);
>> -	if (!mi->bind || mflags != (mi->bind->flags & (~MS_PROPAGATE)))
>> +	if (!mnt_is_overmounted(mi) && !(mi->sb_flags & MS_RDONLY))
>> +		mflags &= ~MS_RDONLY;
>> +	if (mi->bind) {
>> +		bmflags = mi->bind->flags & (~MS_PROPAGATE);
>> +		if (!mnt_is_overmounted(mi->bind) && !(mi->bind->sb_flags & MS_RDONLY))
>> +			bmflags &= ~MS_RDONLY;
>> +	}
>> +
>> +	if (!mi->bind || mflags != bmflags)
>>   		if (mount(NULL, mi->mountpoint, NULL, MS_BIND | MS_REMOUNT | mflags, NULL)) {
>>   			pr_perror("Can't mount at %s", mi->mountpoint);
>>   			goto err;
>> @@ -3654,3 +3672,93 @@ void clean_cr_time_mounts(void)
>>   }
>>   
>>   struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
>> +
>> +int __remount_readonly_mounts(struct ns_id *ns)
>> +{
>> +	struct mount_info *mnt;
>> +
>> +	for (mnt = mntinfo; mnt; mnt = mnt->next) {
>> +		if (ns && mnt->nsid != ns)
>> +			continue;
>> +
>> +		if (mnt_is_overmounted(mnt))
>> +			continue;
>> +
>> +		if (mnt->sb_flags & MS_RDONLY)
>> +			continue;
>> +
>> +		if (!(mnt->flags & MS_RDONLY))
>> +			continue;
>> +
>> +		if (mount(NULL, mnt->ns_mountpoint, NULL,
>> +			  MS_REMOUNT | MS_BIND | (mnt->flags & (~MS_PROPAGATE)),
>> +			  NULL)) {
>> +			pr_perror("Failed to restore %d:%s mount flags %x",
>> +				  mnt->mnt_id, mnt->mountpoint, mnt->flags);
>> +			return -1;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +int ns_remount_readonly_mounts(void *arg)
>> +{
>> +	struct ns_id *nsid;
>> +
>> +	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
>> +		int mntns_fd;
>> +
>> +		if (nsid->nd != &mnt_ns_desc)
>> +			continue;
>> +
>> +		mntns_fd = fdstore_get(nsid->mnt.nsfd_id);
>> +		if (mntns_fd < 0)
>> +			return 1;
>> +
>> +		if (setns(mntns_fd, CLONE_NEWNS) < 0) {
>> +			pr_perror("`- Can't switch");
>> +			close(mntns_fd);
>> +			return 1;
>> +		}
>> +		close(mntns_fd);
>> +
>> +		pr_info("Switched to mntns %u:%u/n", nsid->id, nsid->kid);
>> +
>> +		if (__remount_readonly_mounts(nsid))
>> +			return 1;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +int remount_readonly_mounts(void)
>> +{
>> +	int pid, status;
>> +
>> +	if (!(root_ns_mask & CLONE_NEWNS))
>> +		return __remount_readonly_mounts(NULL);
>> +
>> +	/*
>> +	 * Need a helper process because the root task can share fs via
>> +	 * CLONE_FS and we would not be able to enter mount namespaces
>> +	 */
>> +	pid = clone_noasan(ns_remount_readonly_mounts,
>> +			   CLONE_VFORK | CLONE_VM | CLONE_FILES
>> +			   | CLONE_IO | CLONE_SIGHAND
>> +			   | CLONE_SYSVSEM, NULL);
>> +	if (pid == -1) {
>> +		pr_perror("Can't clone helper process");
>> +		return -1;
>> +	}
>> +
>> +	errno = 0;
>> +	if (waitpid(pid, &status, __WALL) != pid || !WIFEXITED(status)
>> +	    || WEXITSTATUS(status)) {
>> +		pr_err("Can't wait or bad status: errno=%d, status=%d\n",
>> +		       errno, status);
>> +		return -1;
>> +	}
>> +
>> +	return 0;
>> +}
>> -- 
>> 2.17.1
>>

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.


More information about the CRIU mailing list