[Devel] [PATCH v2] overlayfs: add dynamic path resolving in mount options

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Mon May 25 12:45:22 MSK 2020



On 5/22/20 7:14 PM, Alexander Mikhalitsyn wrote:
> This patch adds OVERLAY_FS_DYNAMIC_RESOLVE_PATH_OPTIONS compile-time option,
> and "dyn_path_opts" runtime module option. These options corresponds
> "dynamic path resolving in lowerdir, upperdir, workdir mount options" mode.
> If enabled, user may see real full paths relatively to the mount namespace
> in lowerdir, upperdir, workdir options (/proc/mounts, /proc/<fd>/mountinfo).
> 
> This patch is very helpful to checkpoint/restore functionality of overlayfs
> mounts. With this patch and CRIU it's real to C/R Docker containers with
> overlayfs storage driver.
> 
> Note: d_path function from dcache.c is used to resolve full path in mount
> namespace. This function also adds "(deleted)" suffix if dentry was deleted.
> So, If one of dentries in lowerdir, upperdir, workdir options is deleted,
> we will see "(deleted)" suffix in corresponding path.
> 
> https://jira.sw.ru/browse/PSBM-58614
> 
> Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
> ---
>   fs/overlayfs/Kconfig     | 30 ++++++++++++++++++
>   fs/overlayfs/overlayfs.h |  3 ++
>   fs/overlayfs/ovl_entry.h |  6 ++--
>   fs/overlayfs/super.c     | 82 ++++++++++++++++++++++++++----------------------
>   fs/overlayfs/util.c      | 47 +++++++++++++++++++++++++++
>   5 files changed, 128 insertions(+), 40 deletions(-)
> 
> diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
> index 9384164..15e8fa5 100644
> --- a/fs/overlayfs/Kconfig
> +++ b/fs/overlayfs/Kconfig
> @@ -103,3 +103,33 @@ config OVERLAY_FS_XINO_AUTO
>   	  For more information, see Documentation/filesystems/overlayfs.txt
>   
>   	  If unsure, say N.
> +
> +config OVERLAY_FS_DYNAMIC_RESOLVE_PATH_OPTIONS
> +	bool "Overlayfs: all mount paths options resolves dynamically on options show"
> +	default y
> +	depends on OVERLAY_FS
> +	help
> +	  This option helps checkpoint/restore of overlayfs mounts.
> +	  If N selected, old behavior is saved. In this case lowerdir, upperdir,
> +	  workdir options shows in /proc/fd/mountinfo, /proc/mounts as it given
> +	  by user on mount. User may specify relative paths in these options, then
> +	  we couldn't determine from options which full paths correspond these
> +	  relative paths. Also, after pivot_root syscall these paths (even full)
> +	  will not rebuild according to root change.
> +
> +	  If this config option is enabled then overlay filesystems lowerdir, upperdir,
> +	  workdir options paths will dynamically recalculated as full paths in corresponding
> +	  mount namespaces by default.
> +
> +	  It's also possible to change this behavior on overlayfs module loading.
> +
> +	  Disable this to get a backward compatible with previous kernels configuration,
> +	  but in this case checkpoint/restore functionality for overlayfs mounts
> +	  will not work.
> +
> +	  If backward compatibility is not an issue, then it is safe and
> +	  recommended to say Y here.
> +
> +	  For more information, see Documentation/filesystems/overlayfs.txt
> +
> +	  If unsure, say N.
> \ No newline at end of file
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index fa999d9..055816e 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -253,6 +253,9 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked);
>   void ovl_nlink_end(struct dentry *dentry, bool locked);
>   int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
>   
> +int print_path_option(struct seq_file *m, const char *name, struct path *path);
> +int print_paths_option(struct seq_file *m, const char *name, struct ovl_fs *ofs);
> +
>   static inline bool ovl_is_impuredir(struct dentry *dentry)
>   {
>   	return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> index 05459d3..a2738e9 100644
> --- a/fs/overlayfs/ovl_entry.h
> +++ b/fs/overlayfs/ovl_entry.h
> @@ -43,13 +43,15 @@ struct ovl_path {
>   /* private information held for overlayfs's superblock */
>   struct ovl_fs {
>   	struct vfsmount *upper_mnt;
> +	struct path upperpath;
>   	unsigned int numlower;
>   	/* Number of unique lower sb that differ from upper sb */
>   	unsigned int numlowerfs;
> +	struct path *lowerpaths;
>   	struct ovl_layer *lower_layers;
>   	struct ovl_sb *lower_fs;
> -	/* workbasedir is the path at workdir= mount option */
> -	struct dentry *workbasedir;
> +	/* workbasepath is the path at workdir= mount option */
> +	struct path workbasepath;
>   	/* workdir is the 'work' directory under workbasedir */
>   	struct dentry *workdir;
>   	/* index directory listing overlay inodes by origin file handle */
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index 50965ca..6325f80 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -57,6 +57,10 @@ module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
>   MODULE_PARM_DESC(ovl_xino_auto_def,
>   		 "Auto enable xino feature");
>   
> +static bool ovl_dyn_path_opts = IS_ENABLED(CONFIG_OVERLAY_FS_DYNAMIC_RESOLVE_PATH_OPTIONS);
> +module_param_named(dyn_path_opts, ovl_dyn_path_opts, bool, 0444);
> +MODULE_PARM_DESC(dyn_path_opts, "dyn_path_opts feature enabled");
> +
>   static void ovl_entry_stack_free(struct ovl_entry *oe)
>   {
>   	unsigned int i;
> @@ -245,11 +249,15 @@ static void ovl_free_fs(struct ovl_fs *ofs)
>   	dput(ofs->indexdir);
>   	dput(ofs->workdir);
>   	if (ofs->workdir_locked)
> -		ovl_inuse_unlock(ofs->workbasedir);
> -	dput(ofs->workbasedir);
> +		ovl_inuse_unlock(ofs->workbasepath.dentry);
> +	path_put(&ofs->workbasepath);
>   	if (ofs->upperdir_locked)
>   		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
>   	mntput(ofs->upper_mnt);
> +	path_put(&ofs->upperpath);
> +	for (i = 0; i < ofs->numlower; i++)
> +		path_put(&ofs->lowerpaths[i]);

Though I was wrong about workbasepath, I still have a bad feeling about 
refcount but in different place for lowerpaths and different maner. Imagine:

ovl_fill_super
   ovl_get_lowerstack
     ofs->lowerpaths = kcalloc()
     for () # stacklen times
       # first iter
       ovl_lower_dir
         ovl_mount_dir_noesc
           kern_path # should get reference on path somethere inside
       # second iter
       ovl_lower_dir # fail
       goto out_err
     # does not get to ovl_get_lower_layers and thus no ofs->numlower++
   goto out_err
   ovl_free_fs
     for () # ofs->numlower times - zero
	# don't path_put(&ofs->lowerpaths[0])

Looks like a reference is leaked on this path. Correct me if I'm wrong.

> +	kfree(ofs->lowerpaths);
>   	for (i = 0; i < ofs->numlower; i++)
>   		mntput(ofs->lower_layers[i].mnt);
>   	for (i = 0; i < ofs->numlowerfs; i++)
> @@ -368,11 +376,20 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
>   	struct super_block *sb = dentry->d_sb;
>   	struct ovl_fs *ofs = sb->s_fs_info;
>   
> -	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
> -	if (ofs->config.upperdir) {
> -		seq_show_option(m, "upperdir", ofs->config.upperdir);
> -		seq_show_option(m, "workdir", ofs->config.workdir);
> +	if (ovl_dyn_path_opts) {
> +		print_paths_option(m, "lowerdir", ofs);
> +		if (ofs->config.upperdir) {
> +			print_path_option(m, "upperdir", &ofs->upperpath);
> +			print_path_option(m, "workdir", &ofs->workbasepath);
> +		}
> +	} else {
> +		seq_show_option(m, "lowerdir", ofs->config.lowerdir);
> +		if (ofs->config.upperdir) {
> +			seq_show_option(m, "upperdir", ofs->config.upperdir);
> +			seq_show_option(m, "workdir", ofs->config.workdir);
> +		}
>   	}
> +
>   	if (ofs->config.default_permissions)
>   		seq_puts(m, ",default_permissions");
>   	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
> @@ -586,7 +603,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
>   static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
>   					 const char *name, bool persist)
>   {
> -	struct inode *dir =  ofs->workbasedir->d_inode;
> +	struct inode *dir =  ofs->workbasepath.dentry->d_inode;
>   	struct vfsmount *mnt = ofs->upper_mnt;
>   	struct dentry *work;
>   	int err;
> @@ -597,7 +614,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
>   	locked = true;
>   
>   retry:
> -	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
> +	work = lookup_one_len(name, ofs->workbasepath.dentry, strlen(name));
>   
>   	if (!IS_ERR(work)) {
>   		struct iattr attr = {
> @@ -1009,7 +1026,7 @@ out:
>   	return err;
>   }
>   
> -static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
> +static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workbasepath)
>   {
>   	struct vfsmount *mnt = ofs->upper_mnt;
>   	struct dentry *temp;
> @@ -1030,7 +1047,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
>   	 * workdir. This check requires successful creation of workdir in
>   	 * previous step.
>   	 */
> -	err = ovl_check_d_type_supported(workpath);
> +	err = ovl_check_d_type_supported(workbasepath);
>   	if (err < 0)
>   		goto out;
>   
> @@ -1087,26 +1104,23 @@ out:
>   static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
>   {
>   	int err;
> -	struct path workpath = { };
>   
> -	err = ovl_mount_dir(ofs->config.workdir, &workpath);
> +	err = ovl_mount_dir(ofs->config.workdir, &ofs->workbasepath);
>   	if (err)
>   		goto out;
>   
>   	err = -EINVAL;
> -	if (upperpath->mnt != workpath.mnt) {
> +	if (upperpath->mnt != ofs->workbasepath.mnt) {
>   		pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
>   		goto out;
>   	}
> -	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
> +	if (!ovl_workdir_ok(ofs->workbasepath.dentry, upperpath->dentry)) {
>   		pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
>   		goto out;
>   	}
>   
> -	ofs->workbasedir = dget(workpath.dentry);
> -
>   	err = -EBUSY;
> -	if (ovl_inuse_trylock(ofs->workbasedir)) {
> +	if (ovl_inuse_trylock(ofs->workbasepath.dentry)) {
>   		ofs->workdir_locked = true;
>   	} else if (ofs->config.index) {
>   		pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
> @@ -1115,14 +1129,12 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
>   		pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
>   	}
>   
> -	err = ovl_make_workdir(ofs, &workpath);
> +	err = ovl_make_workdir(ofs, &ofs->workbasepath);
>   	if (err)
>   		goto out;
>   
>   	err = 0;
>   out:
> -	path_put(&workpath);
> -
>   	return err;
>   }
>   
> @@ -1290,7 +1302,6 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
>   {
>   	int err;
>   	char *lowertmp, *lower;
> -	struct path *stack = NULL;
>   	unsigned int stacklen, numlower = 0, i;
>   	bool remote = false;
>   	struct ovl_entry *oe;
> @@ -1316,14 +1327,14 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
>   	}
>   
>   	err = -ENOMEM;
> -	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
> -	if (!stack)
> +	ofs->lowerpaths = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
> +	if (!ofs->lowerpaths)
>   		goto out_err;
>   
>   	err = -EINVAL;
>   	lower = lowertmp;
>   	for (numlower = 0; numlower < stacklen; numlower++) {
> -		err = ovl_lower_dir(lower, &stack[numlower], ofs,
> +		err = ovl_lower_dir(lower, &ofs->lowerpaths[numlower], ofs,
>   				    overlay_stack_depth, &remote);
>   		if (err)
>   			goto out_err;
> @@ -1338,7 +1349,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
>   		goto out_err;
>   	}
>   
> -	err = ovl_get_lower_layers(ofs, stack, numlower);
> +	err = ovl_get_lower_layers(ofs, ofs->lowerpaths, numlower);
>   	if (err)
>   		goto out_err;
>   
> @@ -1348,7 +1359,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
>   		goto out_err;
>   
>   	for (i = 0; i < numlower; i++) {
> -		oe->lowerstack[i].dentry = dget(stack[i].dentry);
> +		oe->lowerstack[i].dentry = dget(ofs->lowerpaths[i].dentry);
>   		oe->lowerstack[i].layer = &ofs->lower_layers[i];
>   	}
>   
> @@ -1358,9 +1369,6 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
>   		sb->s_d_op = &ovl_dentry_operations.ops;
>   
>   out:
> -	for (i = 0; i < numlower; i++)
> -		path_put(&stack[i]);
> -	kfree(stack);
>   	kfree(lowertmp);
>   
>   	return oe;
> @@ -1372,7 +1380,6 @@ out_err:
>   
>   static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>   {
> -	struct path upperpath = { };
>   	struct dentry *root_dentry;
>   	struct ovl_entry *oe;
>   	struct ovl_fs *ofs;
> @@ -1423,11 +1430,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>   			goto out_err;
>   		}
>   
> -		err = ovl_get_upper(ofs, &upperpath);
> +		err = ovl_get_upper(ofs, &ofs->upperpath);
>   		if (err)
>   			goto out_err;
>   
> -		err = ovl_get_workdir(ofs, &upperpath);
> +		err = ovl_get_workdir(ofs, &ofs->upperpath);
>   		if (err)
>   			goto out_err;
>   
> @@ -1454,7 +1461,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>   		sb->s_flags |= MS_RDONLY;
>   
>   	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
> -		err = ovl_get_indexdir(ofs, oe, &upperpath);
> +		err = ovl_get_indexdir(ofs, oe, &ofs->upperpath);
>   		if (err)
>   			goto out_free_oe;
>   
> @@ -1495,17 +1502,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>   
>   	root_dentry->d_fsdata = oe;
>   
> -	mntput(upperpath.mnt);
> -	if (upperpath.dentry) {
> +	if (ofs->upperpath.dentry) {
>   		ovl_dentry_set_upper_alias(root_dentry);
> -		if (ovl_is_impuredir(upperpath.dentry))
> +		if (ovl_is_impuredir(ofs->upperpath.dentry))
>   			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
>   	}
>   
>   	/* Root is always merge -> can have whiteouts */
>   	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
>   	ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
> -	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
> +	ovl_inode_init(d_inode(root_dentry), dget(ofs->upperpath.dentry),
>   		       ovl_dentry_lower(root_dentry));
>   
>   	sb->s_root = root_dentry;
> @@ -1516,7 +1522,7 @@ out_free_oe:
>   	ovl_entry_stack_free(oe);
>   	kfree(oe);
>   out_err:
> -	path_put(&upperpath);
> +	path_put(&ofs->upperpath);
>   	ovl_free_fs(ofs);
>   out:
>   	return err;
> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> index 838c371..7e364fd 100644
> --- a/fs/overlayfs/util.c
> +++ b/fs/overlayfs/util.c
> @@ -17,6 +17,7 @@
>   #include <linux/uuid.h>
>   #include <linux/namei.h>
>   #include <linux/ratelimit.h>
> +#include <linux/seq_file.h>
>   #include "overlayfs.h"
>   
>   int ovl_want_write(struct dentry *dentry)
> @@ -678,3 +679,49 @@ err:
>   	pr_err("overlayfs: failed to lock workdir+upperdir\n");
>   	return -EIO;
>   }
> +
> +int print_path_option(struct seq_file *m, const char *name, struct path *path)
> +{
> +	char *tmp;
> +	char *pathname;
> +
> +	tmp = (char*)__get_free_page(GFP_TEMPORARY);
> +	if (!tmp)
> +		return -ENOMEM;
> +
> +	pathname = d_path(path, tmp, PAGE_SIZE);
> +	if (IS_ERR(pathname))
> +		goto out;
> +
> +	seq_show_option(m, name, pathname);
> + out:
> +	free_page((unsigned long)tmp);
> +
> +	return 0;
> +}
> +
> +int print_paths_option(struct seq_file *m, const char *name, struct ovl_fs *ofs)
> +{
> +	char *tmp;
> +	char *pathname;
> +	int i;
> +
> +	tmp = (char*)__get_free_page(GFP_TEMPORARY);
> +	if (!tmp)
> +		return -ENOMEM;
> +
> +	seq_printf(m, ",%s=", name);

The function seq_show_option also insures that name and value do not 
contain special characters ",= \t\n\\" maybe we also need to do it for 
consistency and print them with something like seq_escape(m, name, ",=: 
\t\n\\")? Except that looks good.

> +
> +	for (i = 0; i < ofs->numlower; i++) {
> +		pathname = d_path(&ofs->lowerpaths[i], tmp, PAGE_SIZE);
> +		if (IS_ERR(pathname))
> +			goto out;
> +
> +		seq_printf(m, "%s%s", (i ? ":" : ""), pathname);

The same.

> +	}
> +
> + out:
> +	free_page((unsigned long)tmp);
> +
> +	return 0;
> +}
> 

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.


More information about the Devel mailing list