[CRIU] [PATCH 2/2 v2] fsnotify: Always provide the path for inotify watchees

Cyrill Gorcunov gorcunov at gmail.com
Tue Oct 13 15:01:44 PDT 2015


On Tue, Oct 13, 2015 at 10:17:45PM +0300, Andrew Vagin wrote:
> > 
> > https://jira.sw.ru/browse/PSBM-39957
> > 
> > Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>

Update attached (I've added also more comments into a code).
Take a look please (I tested inotify tests and containers
c/r)
-------------- next part --------------
>From a502356ab50033c55aecf462dbbc4cd4cc16100c Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov at openvz.org>
Date: Tue, 13 Oct 2015 19:58:22 +0300
Subject: [PATCH] fsnotify: Always provide the path for inotify watchees

In debian-8 container we faced the problem -- systemd creates nested
mount namespaces and inotify watchee are resolved into a path which
is inaccessbile on restore, the same happens when pathes where
watchees are living are bind-overmounted. Thus when we try to
restore such watchees we can't open the paths.

Lets do a trick here (thanks a huge to Andrew Vagin for idea and
overall help) -- walk over all mount points which device match
the handle's device and open handle first and test if the path
provided is openable as well. After all the inotify objects are
bound to inode so it's irrelevean via which path it's assigned.

https://jira.sw.ru/browse/PSBM-39957

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 fsnotify.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 92 insertions(+), 18 deletions(-)

diff --git a/fsnotify.c b/fsnotify.c
index 931e76785b41..a1af4870917b 100644
--- a/fsnotify.c
+++ b/fsnotify.c
@@ -139,43 +139,113 @@ out:
 int check_open_handle(unsigned int s_dev, unsigned long i_ino,
 		FhEntry *f_handle)
 {
+	struct mount_info *openable_m = NULL;
+	struct mount_info *m;
+	fh_t handle;
 	int fd = -1;
 	char *path;
 
-	fd = open_handle(s_dev, i_ino, f_handle);
-	if (fd >= 0) {
-		struct mount_info *mi;
+	decode_handle(&handle, f_handle);
+
+	/*
+	 * We gonna figure out if there a path for the
+	 * watchee which can be used as a destination on
+	 * the restore (because mountpoints may overlap
+	 * paths and open-by-handle return the path which
+	 * is not openable).
+	 *
+	 * So simply iterate over all mountpoints which
+	 * device match the request and try to reopent
+	 * the watchee.
+	 */
+	for (m = mntinfo; m; m = m->next) {
+		char buf[PATH_MAX], *__path;
+		int mntfd, openable_fd;
+
+		if (m->s_dev != s_dev)
+			continue;
 
-		pr_debug("\tHandle 0x%x:0x%lx is openable\n", s_dev, i_ino);
+		mntfd = __open_mountpoint(m, -1);
+		pr_debug("\t\tTrying via mntid %d root %s ns_mountpoint @%s (%d)\n",
+			 m->mnt_id, m->root, m->ns_mountpoint, mntfd);
+		if (mntfd < 0)
+			continue;
 
-		mi = lookup_mnt_sdev(s_dev);
-		if (mi == NULL) {
-			pr_err("Unable to lookup a mount by dev 0x%x\n", s_dev);
+		fd = userns_call(open_by_handle, UNS_FDOUT, &handle,
+				 sizeof(handle), mntfd);
+		close(mntfd);
+		if (fd < 0)
+			continue;
+
+		/*
+		 * We manage to open it via handle
+		 * at least once. We need to know that
+		 * for filesystems which trash their
+		 * inode numbers between remounts so
+		 * we have to fetch the complete path
+		 * for them.
+		 */
+		if (!openable_m)
+			openable_m = m;
+
+		if (read_fd_link(fd, buf, sizeof(buf)) < 0) {
+			close_safe(&fd);
 			goto err;
 		}
+		close_safe(&fd);
 
 		/*
-		 * Inode numbers are not restored for tmpfs content, but we can
-		 * get file names, becasue tmpfs cache is not pruned.
+		 * Convert into a relative path.
 		 */
-		if ((mi->fstype->code == FSTYPE__TMPFS) ||
-				(mi->fstype->code == FSTYPE__DEVTMPFS)) {
-			char p[PATH_MAX];
+		__path = (buf[1] != '\0') ? buf + 1 : buf;
+		pr_debug("\t\t\tlink as %s\n", __path);
 
-			if (read_fd_link(fd, p, sizeof(p)) < 0)
-				goto err;
+		mntfd = mntns_get_root_by_mnt_id(m->mnt_id);
+		if (mntfd < 0)
+			continue;
+
+		openable_fd = openat(mntfd, __path, O_PATH);
+		/*
+		 * Close it early, we only interested
+		 * in openability.
+		 */
+		close(openable_fd);
 
-			path = xstrdup(p);
+		if (openable_fd >= 0) {
+			pr_debug("\t\t\topenable as %s\n", __path);
+			path = xstrdup(buf);
 			if (path == NULL)
 				goto err;
 
 			f_handle->has_mnt_id = true;
-			f_handle->mnt_id = mi->mnt_id;
-
+			f_handle->mnt_id = m->mnt_id;
 			goto out;
+		} else {
+			pr_debug("\t\t\tnot openable as %s (%m)\n", __path);
+		}
+	}
+
+	/*
+	 * We were unable to find proper path for the
+	 * handle, so there is no much we can do: either
+	 * rely on the fact that system will open handle
+	 * without path supplied as a hint (in worst case
+	 * we will fail on restore), either @force_irmap
+	 * will lookup for path for us.
+	 *
+	 * With one exception though: tmpfs and devtmpfs
+	 * are not preserving their inode numbers between
+	 * mounts so we have to be sure that we're not saving
+	 * the state which definitely fail on restore.
+	 */
+	if (openable_m) {
+		if ((openable_m->fstype->code == FSTYPE__TMPFS) ||
+		    (openable_m->fstype->code == FSTYPE__DEVTMPFS)) {
+			pr_err("Can't fetch openable path for tmpfs/devtmpfs\n");
+			goto err;
 		}
 
-		if (!opts.force_irmap)
+		if (!opts.force_irmap) {
 			/*
 			 * If we're not forced to do irmap, then
 			 * say we have no path for watch. Otherwise
@@ -185,7 +255,9 @@ int check_open_handle(unsigned int s_dev, unsigned long i_ino,
 			 * FIXME -- no need to open-by-handle if
 			 * we are in force-irmap and not on tempfs
 			 */
+			pr_warn("\tHandle 0x%x:0x%lx cannot be opened\n", s_dev, i_ino);
 			goto out_nopath;
+		}
 	}
 
 	pr_warn("\tHandle 0x%x:0x%lx cannot be opened\n", s_dev, i_ino);
@@ -194,9 +266,11 @@ int check_open_handle(unsigned int s_dev, unsigned long i_ino,
 		pr_err("\tCan't dump that handle\n");
 		return -1;
 	}
+
 out:
 	pr_debug("\tDumping %s as path for handle\n", path);
 	f_handle->path = path;
+
 out_nopath:
 	close_safe(&fd);
 	return 0;
-- 
2.4.3



More information about the CRIU mailing list