[CRIU] [PATCH v3] mnt: Carry run-time device ID in mount_info

Cyrill Gorcunov gorcunov at gmail.com
Thu Dec 3 08:03:21 PST 2015


On Thu, Dec 03, 2015 at 06:53:37PM +0300, Pavel Emelyanov wrote:
> > @@ -1950,6 +1979,9 @@ static int do_new_mount(struct mount_info *mi)
> >  
> >  	mi->mounted = true;
> >  
> > +	if (fetch_rt_stat(mi, mi->mountpoint))
> > +		return -1;
> > +
> 
> Why not in do_mount_one() right before calling propagate_mount()? This
> would be _one_ place instead of two.

Yeah, good point, thanks!
-------------- next part --------------
>From efe3da38de2fb394b89d67a873e1efbadff43c86 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov at openvz.org>
Date: Wed, 2 Dec 2015 17:26:38 +0300
Subject: [PATCH] mnt: Carry run-time device ID in mount_info

When we're restoring fsnotify watchees we need to resolve
path to a handle at some mountpoint referred by @s_dev
member (device ID) which is saved inside image. This
ID actually may be changed at the every mount (say
one restores container after machine reboot) or in
case of container's migration.

Thus the test for overmounting in __open_mountpoint
will fail and we get an error.

Lets do a trick: introduce @s_dev_rt member which
is supposed to carry run-time device ID. When dumping
this member simply equal to traditional @s_dev fetched
from the procfs, but when restoring we fetch it from
stat call once mountpoint become alive.

https://jira.sw.ru/browse/PSBM-41610

v2:
 - predefine MOUNT_INVALID_DEV
 - use fetch_rt_stat instead of assigning device in restore_shared_options
 - copy @s_dev_rt in propagate_siblings and propagate_mount

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 include/mount.h |  3 +++
 mount.c         | 45 +++++++++++++++++++++++++++++++++++++++++----
 proc_parse.c    |  2 +-
 3 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/include/mount.h b/include/mount.h
index c3abc8479965..5a4c3b9bb0ca 100644
--- a/include/mount.h
+++ b/include/mount.h
@@ -29,10 +29,13 @@ struct ext_mount {
 	char			*val;
 };
 
+#define MOUNT_INVALID_DEV	(0)
+
 struct mount_info {
 	int			mnt_id;
 	int			parent_mnt_id;
 	unsigned int		s_dev;
+	unsigned int		s_dev_rt;
 	char			*root;
 	/*
 	 * During dump mountpoint contains path with dot at the
diff --git a/mount.c b/mount.c
index 25fbc134ebe1..1f43160ed918 100644
--- a/mount.c
+++ b/mount.c
@@ -1008,10 +1008,23 @@ int __open_mountpoint(struct mount_info *pm, int mnt_fd)
 		goto err;
 	}
 
+	if(pm->s_dev_rt == MOUNT_INVALID_DEV) {
+		pr_err("Resolving over unvalid device for %#x %s %s\n",
+		       pm->s_dev, pm->fstype->name, pm->ns_mountpoint);
+		goto err;
+	}
+
 	dev = phys_stat_resolve_dev(pm->nsid, st.st_dev, pm->ns_mountpoint + 1);
-	if (dev != pm->s_dev) {
-		pr_err("The file system %#x (%#x) %s %s is inaccessible\n",
-		       pm->s_dev, (int)dev, pm->fstype->name, pm->ns_mountpoint);
+	/*
+	 * Always check for @s_dev_rt here, because the @s_dev
+	 * from the image (in case of restore) has all rights
+	 * to not match the device (say it's migrated and kernel
+	 * allocates new device ID).
+	 */
+	if (dev != pm->s_dev_rt) {
+		pr_err("The file system %#x %#x (%#x) %s %s is inaccessible\n",
+		       pm->s_dev, pm->s_dev_rt, (int)dev,
+		       pm->fstype->name, pm->ns_mountpoint);
 		goto err;
 	}
 
@@ -1865,6 +1878,7 @@ static int propagate_siblings(struct mount_info *mi)
 			continue;
 		pr_debug("\t\tBind %s\n", t->mountpoint);
 		t->bind = mi;
+		t->s_dev_rt = mi->s_dev_rt;
 	}
 
 	list_for_each_entry(t, &mi->mnt_slave_list, mnt_slave) {
@@ -1872,6 +1886,7 @@ static int propagate_siblings(struct mount_info *mi)
 			continue;
 		pr_debug("\t\tBind %s\n", t->mountpoint);
 		t->bind = mi;
+		t->s_dev_rt = mi->s_dev_rt;
 	}
 
 	return 0;
@@ -1917,12 +1932,26 @@ skip_parent:
 			if (t->master_id)
 				continue;
 			t->bind = mi;
+			t->s_dev_rt = mi->s_dev_rt;
 		}
 	}
 
 	return 0;
 }
 
+static int fetch_rt_stat(struct mount_info *m, const char *where)
+{
+	struct stat st;
+
+	if (stat(where, &st)) {
+		pr_perror("Can't stat on %s\n", where);
+		return -1;
+	}
+
+	m->s_dev_rt = MKKDEV(major(st.st_dev), minor(st.st_dev));
+	return 0;
+}
+
 static int do_new_mount(struct mount_info *mi)
 {
 	unsigned long mflags = MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE;
@@ -2117,7 +2146,7 @@ static int do_mount_root(struct mount_info *mi)
 						mi->shared_id, mi->master_id))
 		return -1;
 
-	return 0;
+	return fetch_rt_stat(mi, mi->mountpoint);
 }
 
 static int do_mount_one(struct mount_info *mi)
@@ -2143,6 +2172,9 @@ static int do_mount_one(struct mount_info *mi)
 	else
 		ret = do_bind_mount(mi);
 
+	if (ret == 0 && fetch_rt_stat(mi, mi->mountpoint))
+		return -1;
+
 	if (ret == 0 && propagate_mount(mi))
 		return -1;
 
@@ -2255,6 +2287,11 @@ struct mount_info *mnt_entry_alloc()
 {
 	struct mount_info *new;
 
+	/*
+	 * We rely on xzalloc here for MOUNT_INVALID_DEV.
+	 */
+	BUILD_BUG_ON(MOUNT_INVALID_DEV);
+
 	new = xzalloc(sizeof(struct mount_info));
 	if (new) {
 		INIT_LIST_HEAD(&new->children);
diff --git a/proc_parse.c b/proc_parse.c
index 3d924b2f68d5..4d67a13188bd 100644
--- a/proc_parse.c
+++ b/proc_parse.c
@@ -1041,7 +1041,7 @@ static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname)
 	if (!new->mountpoint)
 		goto err;
 
-	new->s_dev = MKKDEV(kmaj, kmin);
+	new->s_dev = new->s_dev_rt = MKKDEV(kmaj, kmin);
 	new->flags = 0;
 	if (parse_mnt_flags(opt, &new->flags))
 		goto err;
-- 
2.5.0



More information about the CRIU mailing list