[Devel] [PATCH] overlayfs: add dynamic path resolving in mount options

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Fri May 22 14:14:52 MSK 2020


Nice job. Please see comments inline.

On 5/22/20 11:01 AM, Alexander Mikhalitsyn wrote:
> This patch adds OVERLAY_FS_DYNAMIC_RESOLVE_PATH_OPTIONS compile-time option,
> and "dyn_path_opts" runtime module option. These options corresponds
> "dynamic path resolving in lowerdir, upperdir, workdir mount options" mode.
> If enabled, user may see real full paths relatively to the mount namespace
> in lowerdir, upperdir, workdir options (/proc/mounts, /proc/<fd>/mountinfo).
> 
> This patch is very helpful to checkpoint/restore functionality of overlayfs
> mounts. With this patch and CRIU it's real to C/R Docker containers with
> overlayfs storage driver.
> 
> Note: d_path function from dcache.c is used to resolve full path in mount
> namespace. This function also adds "(deleted)" suffix if dentry was deleted.
> So, If one of dentries in lowerdir, upperdir, workdir options is deleted,
> we will see "(deleted)" suffix in corresponding path.
> 
> https://jira.sw.ru/browse/PSBM-58614
> 
> Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
> 
> ---
>   fs/overlayfs/Kconfig     | 30 ++++++++++++++++++
>   fs/overlayfs/overlayfs.h |  3 ++
>   fs/overlayfs/ovl_entry.h |  6 ++--
>   fs/overlayfs/super.c     | 82 ++++++++++++++++++++++++++----------------------
>   fs/overlayfs/util.c      | 62 ++++++++++++++++++++++++++++++++++++
>   5 files changed, 143 insertions(+), 40 deletions(-)
> 
> diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
> index 9384164..15e8fa5 100644
> --- a/fs/overlayfs/Kconfig
> +++ b/fs/overlayfs/Kconfig
> @@ -103,3 +103,33 @@ config OVERLAY_FS_XINO_AUTO
>   	  For more information, see Documentation/filesystems/overlayfs.txt
>   
>   	  If unsure, say N.
> +
> +config OVERLAY_FS_DYNAMIC_RESOLVE_PATH_OPTIONS
> +	bool "Overlayfs: all mount paths options resolves dynamically on options show"
> +	default y
> +	depends on OVERLAY_FS
> +	help
> +	  This option helps checkpoint/restore of overlayfs mounts.
> +	  If N selected, old behavior is saved. In this case lowerdir, upperdir,
> +	  workdir options shows in /proc/fd/mountinfo, /proc/mounts as it given
> +	  by user on mount. User may specify relative paths in these options, then
> +	  we couldn't determine from options which full paths correspond these
> +	  relative paths. Also, after pivot_root syscall these paths (even full)
> +	  will not rebuild according to root change.
> +
> +	  If this config option is enabled then overlay filesystems lowerdir, upperdir,
> +	  workdir options paths will dynamically recalculated as full paths in corresponding
> +	  mount namespaces by default.
> +
> +	  It's also possible to change this behavior on overlayfs module loading.
> +
> +	  Disable this to get a backward compatible with previous kernels configuration,
> +	  but in this case checkpoint/restore functionality for overlayfs mounts
> +	  will not work.
> +
> +	  If backward compatibility is not an issue, then it is safe and
> +	  recommended to say Y here.
> +
> +	  For more information, see Documentation/filesystems/overlayfs.txt
> +
> +	  If unsure, say N.
> \ No newline at end of file
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index fa999d9..055816e 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -253,6 +253,9 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked);
>   void ovl_nlink_end(struct dentry *dentry, bool locked);
>   int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
>   
> +int print_path_option(struct seq_file *m, const char *name, struct path *path);
> +int print_paths_option(struct seq_file *m, const char *name, struct ovl_fs *ofs);
> +
>   static inline bool ovl_is_impuredir(struct dentry *dentry)
>   {
>   	return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> index 05459d3..a2738e9 100644
> --- a/fs/overlayfs/ovl_entry.h
> +++ b/fs/overlayfs/ovl_entry.h
> @@ -43,13 +43,15 @@ struct ovl_path {
>   /* private information held for overlayfs's superblock */
>   struct ovl_fs {
>   	struct vfsmount *upper_mnt;
> +	struct path upperpath;
>   	unsigned int numlower;
>   	/* Number of unique lower sb that differ from upper sb */
>   	unsigned int numlowerfs;
> +	struct path *lowerpaths;
>   	struct ovl_layer *lower_layers;
>   	struct ovl_sb *lower_fs;
> -	/* workbasedir is the path at workdir= mount option */
> -	struct dentry *workbasedir;
> +	/* workbasepath is the path at workdir= mount option */
> +	struct path workbasepath;
>   	/* workdir is the 'work' directory under workbasedir */
>   	struct dentry *workdir;
>   	/* index directory listing overlay inodes by origin file handle */
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index 50965ca..2bbcbe1 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -57,6 +57,10 @@ module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
>   MODULE_PARM_DESC(ovl_xino_auto_def,
>   		 "Auto enable xino feature");
>   
> +static bool ovl_dyn_path_opts = IS_ENABLED(CONFIG_OVERLAY_FS_DYNAMIC_RESOLVE_PATH_OPTIONS);
> +module_param_named(dyn_path_opts, ovl_dyn_path_opts, bool, 0444);
> +MODULE_PARM_DESC(dyn_path_opts, "Auto enable xino feature");

Description is a leftover from ovl_xino_auto_def

> +
>   static void ovl_entry_stack_free(struct ovl_entry *oe)
>   {
>   	unsigned int i;
> @@ -245,11 +249,15 @@ static void ovl_free_fs(struct ovl_fs *ofs)
>   	dput(ofs->indexdir);
>   	dput(ofs->workdir);
>   	if (ofs->workdir_locked)
> -		ovl_inuse_unlock(ofs->workbasedir);
> -	dput(ofs->workbasedir);
> +		ovl_inuse_unlock(ofs->workbasepath.dentry);
> +	path_put(&ofs->workbasepath);

You do path_put here, but what if workbasepath is not initialized here?

Imagine codepath:

ovl_fill_super
   ovl_get_upper # <-fails
     goto out_err
# ovl_get_workdir was not called workbasepath is NULL (ofs = kzalloc)
   out_err:
     ovl_free_fs
       path_put(workbasepath)
         path->dentry # <- crash

dput can handle NULL but path_put can't. Something similar should also 
apply to lowerpaths path_put.

>   	if (ofs->upperdir_locked)
>   		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
>   	mntput(ofs->upper_mnt);
> +	path_put(&ofs->upperpath);
> +	for (i = 0; i < ofs->numlower; i++)
> +		path_put(&ofs->lowerpaths[i]);
> +	kfree(ofs->lowerpaths);
>   	for (i = 0; i < ofs->numlower; i++)
>   		mntput(ofs->lower_layers[i].mnt);
>   	for (i = 0; i < ofs->numlowerfs; i++)
> @@ -368,11 +376,20 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
>   	struct super_block *sb = dentry->d_sb;
>   	struct ovl_fs *ofs = sb->s_fs_info;
>   
> -	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
> -	if (ofs->config.upperdir) {
> -		seq_show_option(m, "upperdir", ofs->config.upperdir);
> -		seq_show_option(m, "workdir", ofs->config.workdir);
> +	if (ovl_dyn_path_opts) {
> +		print_paths_option(m, "lowerdir", ofs);
> +		if (ofs->config.upperdir) {
> +			print_path_option(m, "upperdir", &ofs->upperpath);
> +			print_path_option(m, "workdir", &ofs->workbasepath);
> +		}
> +	} else {
> +		seq_show_option(m, "lowerdir", ofs->config.lowerdir);
> +		if (ofs->config.upperdir) {
> +			seq_show_option(m, "upperdir", ofs->config.upperdir);
> +			seq_show_option(m, "workdir", ofs->config.workdir);
> +		}
>   	}
> +
>   	if (ofs->config.default_permissions)
>   		seq_puts(m, ",default_permissions");
>   	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
> @@ -586,7 +603,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
>   static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
>   					 const char *name, bool persist)
>   {
> -	struct inode *dir =  ofs->workbasedir->d_inode;
> +	struct inode *dir =  ofs->workbasepath.dentry->d_inode;
>   	struct vfsmount *mnt = ofs->upper_mnt;
>   	struct dentry *work;
>   	int err;
> @@ -597,7 +614,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
>   	locked = true;
>   
>   retry:
> -	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
> +	work = lookup_one_len(name, ofs->workbasepath.dentry, strlen(name));
>   
>   	if (!IS_ERR(work)) {
>   		struct iattr attr = {
> @@ -1009,7 +1026,7 @@ out:
>   	return err;
>   }
>   
> -static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
> +static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workbasepath)
>   {
>   	struct vfsmount *mnt = ofs->upper_mnt;
>   	struct dentry *temp;
> @@ -1030,7 +1047,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
>   	 * workdir. This check requires successful creation of workdir in
>   	 * previous step.
>   	 */
> -	err = ovl_check_d_type_supported(workpath);
> +	err = ovl_check_d_type_supported(workbasepath);
>   	if (err < 0)
>   		goto out;
>   
> @@ -1087,26 +1104,23 @@ out:
>   static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
>   {
>   	int err;
> -	struct path workpath = { };
>   
> -	err = ovl_mount_dir(ofs->config.workdir, &workpath);
> +	err = ovl_mount_dir(ofs->config.workdir, &ofs->workbasepath);
>   	if (err)
>   		goto out;
>   
>   	err = -EINVAL;
> -	if (upperpath->mnt != workpath.mnt) {
> +	if (upperpath->mnt != ofs->workbasepath.mnt) {
>   		pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
>   		goto out;
>   	}
> -	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
> +	if (!ovl_workdir_ok(ofs->workbasepath.dentry, upperpath->dentry)) {
>   		pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
>   		goto out;
>   	}
>   
> -	ofs->workbasedir = dget(workpath.dentry);
> -
>   	err = -EBUSY;
> -	if (ovl_inuse_trylock(ofs->workbasedir)) {
> +	if (ovl_inuse_trylock(ofs->workbasepath.dentry)) {
>   		ofs->workdir_locked = true;
>   	} else if (ofs->config.index) {
>   		pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
> @@ -1115,14 +1129,12 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
>   		pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
>   	}
>   
> -	err = ovl_make_workdir(ofs, &workpath);
> +	err = ovl_make_workdir(ofs, &ofs->workbasepath);
>   	if (err)
>   		goto out;
>   
>   	err = 0;
>   out:
> -	path_put(&workpath);
> -
>   	return err;
>   }
>   
> @@ -1290,7 +1302,6 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
>   {
>   	int err;
>   	char *lowertmp, *lower;
> -	struct path *stack = NULL;
>   	unsigned int stacklen, numlower = 0, i;
>   	bool remote = false;
>   	struct ovl_entry *oe;
> @@ -1316,14 +1327,14 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
>   	}
>   
>   	err = -ENOMEM;
> -	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
> -	if (!stack)
> +	ofs->lowerpaths = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
> +	if (!ofs->lowerpaths)
>   		goto out_err;
>   
>   	err = -EINVAL;
>   	lower = lowertmp;
>   	for (numlower = 0; numlower < stacklen; numlower++) {
> -		err = ovl_lower_dir(lower, &stack[numlower], ofs,
> +		err = ovl_lower_dir(lower, &ofs->lowerpaths[numlower], ofs,
>   				    overlay_stack_depth, &remote);
>   		if (err)
>   			goto out_err;
> @@ -1338,7 +1349,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
>   		goto out_err;
>   	}
>   
> -	err = ovl_get_lower_layers(ofs, stack, numlower);
> +	err = ovl_get_lower_layers(ofs, ofs->lowerpaths, numlower);
>   	if (err)
>   		goto out_err;
>   
> @@ -1348,7 +1359,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
>   		goto out_err;
>   
>   	for (i = 0; i < numlower; i++) {
> -		oe->lowerstack[i].dentry = dget(stack[i].dentry);
> +		oe->lowerstack[i].dentry = dget(ofs->lowerpaths[i].dentry);
>   		oe->lowerstack[i].layer = &ofs->lower_layers[i];
>   	}
>   
> @@ -1358,9 +1369,6 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
>   		sb->s_d_op = &ovl_dentry_operations.ops;
>   
>   out:
> -	for (i = 0; i < numlower; i++)
> -		path_put(&stack[i]);
> -	kfree(stack);
>   	kfree(lowertmp);
>   
>   	return oe;
> @@ -1372,7 +1380,6 @@ out_err:
>   
>   static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>   {
> -	struct path upperpath = { };
>   	struct dentry *root_dentry;
>   	struct ovl_entry *oe;
>   	struct ovl_fs *ofs;
> @@ -1423,11 +1430,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>   			goto out_err;
>   		}
>   
> -		err = ovl_get_upper(ofs, &upperpath);
> +		err = ovl_get_upper(ofs, &ofs->upperpath);
>   		if (err)
>   			goto out_err;
>   
> -		err = ovl_get_workdir(ofs, &upperpath);
> +		err = ovl_get_workdir(ofs, &ofs->upperpath);
>   		if (err)
>   			goto out_err;
>   
> @@ -1454,7 +1461,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>   		sb->s_flags |= MS_RDONLY;
>   
>   	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
> -		err = ovl_get_indexdir(ofs, oe, &upperpath);
> +		err = ovl_get_indexdir(ofs, oe, &ofs->upperpath);
>   		if (err)
>   			goto out_free_oe;
>   
> @@ -1495,17 +1502,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>   
>   	root_dentry->d_fsdata = oe;
>   
> -	mntput(upperpath.mnt);
> -	if (upperpath.dentry) {
> +	if (ofs->upperpath.dentry) {
>   		ovl_dentry_set_upper_alias(root_dentry);
> -		if (ovl_is_impuredir(upperpath.dentry))
> +		if (ovl_is_impuredir(ofs->upperpath.dentry))
>   			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
>   	}
>   
>   	/* Root is always merge -> can have whiteouts */
>   	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
>   	ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
> -	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
> +	ovl_inode_init(d_inode(root_dentry), dget(ofs->upperpath.dentry),

Why do we need this extra dget?

>   		       ovl_dentry_lower(root_dentry));
>   
>   	sb->s_root = root_dentry;
> @@ -1516,7 +1522,7 @@ out_free_oe:
>   	ovl_entry_stack_free(oe);
>   	kfree(oe);
>   out_err:
> -	path_put(&upperpath);
> +	path_put(&ofs->upperpath);
>   	ovl_free_fs(ofs);
>   out:
>   	return err;
> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> index 838c371..28cdf31 100644
> --- a/fs/overlayfs/util.c
> +++ b/fs/overlayfs/util.c
> @@ -17,6 +17,7 @@
>   #include <linux/uuid.h>
>   #include <linux/namei.h>
>   #include <linux/ratelimit.h>
> +#include <linux/seq_file.h>
>   #include "overlayfs.h"
>   
>   int ovl_want_write(struct dentry *dentry)
> @@ -678,3 +679,64 @@ err:
>   	pr_err("overlayfs: failed to lock workdir+upperdir\n");
>   	return -EIO;
>   }
> +
> +int print_path_option(struct seq_file *m, const char *name, struct path *path)
> +{
> +	char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
> +	char *pathname;
> +	int len;
> +
> +	if (!tmp)
> +		return -ENOMEM;

Does not look nice when we initialize variable inside of a declaration 
but need to check error separately, maybe just do:

char *tmp;

tmp = (char*)__get_free_page(GFP_TEMPORARY);
if (!tmp)
         return -ENOMEM;

> +
> +	pathname = d_path(path, tmp, PAGE_SIZE);
> +	len = PTR_ERR(pathname);

Looks like an excess assignment, the value of "len" is never used after 
this and before next "len" asignment.

> +	if (IS_ERR(pathname))
> +		goto out;

> +	len = tmp + PAGE_SIZE - 1 - pathname;
> +	pathname[len] = 0;

Two lines above and len variable look excess, as d_path ads '\0' in the end.

> +	seq_show_option(m, name, pathname);
> + out:
> +	free_page((unsigned long)tmp);
> +
> +	return 0;
> +}
> +
> +int print_paths_option(struct seq_file *m, const char *name, struct ovl_fs *ofs)
> +{
> +	unsigned int order = ilog2(ofs->numlower) + 1;
> +	char *res = (char*)__get_free_pages(GFP_TEMPORARY, order);
> +	char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
> +	char *pathname;
> +	int len;
> +	int i;
> +	char *shift;
> +
> +	if (!res)
> +		return -ENOMEM;
> +
> +	if (!tmp)
> +		return -ENOME > +
> +	shift = res;
> +	for (i = 0; i < ofs->numlower; i++) {
> +		pathname = d_path(&ofs->lowerpaths[i], tmp, PAGE_SIZE);
> +		len = PTR_ERR(pathname);
> +		if (IS_ERR(pathname))
> +			goto out;
> +		len = tmp + PAGE_SIZE - 1 - pathname;
> +		pathname[len] = 0;
> +
> +		*shift = ':';
> +		memcpy(++shift, pathname, len);
> +		shift += len;

And all the same applies to this function too.

More over with memcpy you don't controll overflow. You can:

len = snprintf(shift, remaining_lenth, "%s", pathname);
if (len >= remaining_lenth) {
	/* overflow */
}
shift += len;

I think "len = strlen(pathname)" looks better than "tmp + PAGE_SIZE - 1 
- pathname" though maybe a bit less effective. Or you can use snprintf 
instead of memcpy, it will also identify printed len.

> +	}
> +	*shift = 0;
> +
> +	seq_show_option(m, name, res+1);
> + out:
> +	free_pages((unsigned long)res, order);
> +	free_page((unsigned long)tmp);
> +
> +	return 0;
> +}
> 

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.


More information about the Devel mailing list