[Devel] [PATCH rh7 v3 2/2] ve/fs/sync: fix CT's mountpoints traversal
Andrey Ryabinin
aryabinin at virtuozzo.com
Thu Feb 25 08:19:38 PST 2016
Currently sync reads 've->root_path.mnt' mount and iterate over it childs.
This doesn't work, because
a) not all in-container mounts are in childs list of root mount.
b) ve->root_path.mnt points to incorrect 'struct mount *'.
This patch slightly rework's mounts traversal. Now sync iterates over all
mounts of mount namespaces in ve. List of ve's mount namespaces maintained
via mntns_list.
https://jira.sw.ru/browse/PSBM-44125
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
fs/mount.h | 1 +
fs/namespace.c | 7 +++++++
fs/sync.c | 40 +++++++++++++++++++++-------------------
include/linux/ve.h | 1 +
kernel/ve/ve.c | 4 ++++
5 files changed, 34 insertions(+), 19 deletions(-)
diff --git a/fs/mount.h b/fs/mount.h
index b496064..285484c 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -7,6 +7,7 @@ struct mnt_namespace {
unsigned int proc_inum;
struct mount * root;
struct list_head list;
+ struct list_head mntns_list;
struct user_namespace *user_ns;
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
diff --git a/fs/namespace.c b/fs/namespace.c
index fa9ee9e..8c02d0c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2569,6 +2569,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
atomic_set(&new_ns->count, 1);
new_ns->root = NULL;
INIT_LIST_HEAD(&new_ns->list);
+ INIT_LIST_HEAD(&new_ns->mntns_list);
init_waitqueue_head(&new_ns->poll);
new_ns->event = 0;
new_ns->user_ns = get_user_ns(user_ns);
@@ -2609,6 +2610,8 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
list_add_tail(&new_ns->list, &new->mnt_list);
br_write_unlock(&vfsmount_lock);
+ list_add(&new_ns->mntns_list, &get_exec_env()->mntns_list);
+
/*
* Second pass: switch the tsk->fs->* elements and mark new vfsmounts
* as belonging to new namespace. We have already acquired a private
@@ -2673,7 +2676,10 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
struct mount *mnt = real_mount(m);
mnt->mnt_ns = new_ns;
new_ns->root = mnt;
+ down_write(&namespace_sem);
list_add(&mnt->mnt_list, &new_ns->list);
+ list_add(&new_ns->mntns_list, &get_exec_env()->mntns_list);
+ up_write(&namespace_sem);
} else {
mntput(m);
}
@@ -2954,6 +2960,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
namespace_lock();
br_write_lock(&vfsmount_lock);
umount_tree(ns->root, 0);
+ list_del(&ns->mntns_list);
br_write_unlock(&vfsmount_lock);
namespace_unlock();
free_mnt_ns(ns);
diff --git a/fs/sync.c b/fs/sync.c
index 0eb621f..abb52f5 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -129,33 +129,35 @@ static int sync_filesystem_collected(struct list_head *sync_list, struct super_b
static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync_list)
{
- struct mount *root = real_mount(ve->root_path.mnt);
struct mount *mnt;
+ struct mnt_namespace *mnt_ns;
struct sync_sb *ss;
int ret = 0;
BUG_ON(!list_empty(sync_list));
down_read(&namespace_sem);
- for (mnt = root; mnt; mnt = next_mnt(mnt, root)) {
- if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
- continue;
-
- ss = kmalloc(sizeof(*ss), GFP_KERNEL);
- if (ss == NULL) {
- ret = -ENOMEM;
- break;
+ list_for_each_entry(mnt_ns, &ve->mntns_list, mntns_list) {
+ list_for_each_entry(mnt, &mnt_ns->list, mnt_list) {
+ if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
+ continue;
+
+ ss = kmalloc(sizeof(*ss), GFP_KERNEL);
+ if (ss == NULL) {
+ ret = -ENOMEM;
+ break;
+ }
+ ss->sb = mnt->mnt.mnt_sb;
+ /*
+ * We hold mount point and thus can be sure, that superblock is
+ * alive. And it means, that we can safely increase it's usage
+ * counter.
+ */
+ spin_lock(&sb_lock);
+ ss->sb->s_count++;
+ spin_unlock(&sb_lock);
+ list_add_tail(&ss->list, sync_list);
}
- ss->sb = mnt->mnt.mnt_sb;
- /*
- * We hold mount point and thus can be sure, that superblock is
- * alive. And it means, that we can safely increase it's usage
- * counter.
- */
- spin_lock(&sb_lock);
- ss->sb->s_count++;
- spin_unlock(&sb_lock);
- list_add_tail(&ss->list, sync_list);
}
up_read(&namespace_sem);
return ret;
diff --git a/include/linux/ve.h b/include/linux/ve.h
index e603d9e..243bea1 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -115,6 +115,7 @@ struct ve_struct {
struct list_head devmnt_list;
struct mutex devmnt_mutex;
+ struct list_head mntns_list;
struct kmapset_key ve_sysfs_perms;
#ifdef CONFIG_AIO
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 231f398..ffd55e4 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -80,6 +80,7 @@ struct ve_struct ve0 = {
.sched_lat_ve.cur = &ve0_lat_stats,
.init_cred = &init_cred,
.mnt_nr = 0,
+ .mntns_list = LIST_HEAD_INIT(ve0.mntns_list),
};
EXPORT_SYMBOL(ve0);
@@ -652,6 +653,7 @@ do_init:
INIT_LIST_HEAD(&ve->devices);
INIT_LIST_HEAD(&ve->ve_list);
INIT_LIST_HEAD(&ve->devmnt_list);
+ INIT_LIST_HEAD(&ve->mntns_list);
mutex_init(&ve->devmnt_mutex);
kmapset_init_key(&ve->ve_sysfs_perms);
@@ -706,6 +708,8 @@ static void ve_destroy(struct cgroup *cg)
kmapset_unlink(&ve->ve_sysfs_perms, &ve_sysfs_perms);
free_ve_devmnts(ve);
+ WARN_ON(!list_empty(&ve->mntns_list));
+
ve_log_destroy(ve);
#if IS_ENABLED(CONFIG_BINFMT_MISC)
kfree(ve->binfmt_misc);
--
2.4.10
More information about the Devel
mailing list