[Devel] [PATCH RHEL7 COMMIT] Revert "devtmpfs: containerize it with new obj ns operation"
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Aug 28 05:10:57 PDT 2015
The commit is pushed to "branch-rh7-3.10.0-229.7.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.6.3
------>
commit 968c8efb7981f87f8bc0616741edb6c0bc556d76
Author: Vladimir Davydov <vdavydov at parallels.com>
Date: Fri Aug 28 16:10:57 2015 +0400
Revert "devtmpfs: containerize it with new obj ns operation"
Patchset description:
Rework devtmpfs virtualization
Currently, we implement full-featured devtmpfs virtualization for VE:
when a device is created in a VE "namespace", we send a signal to
kdevtmpfs to create the devnode on devtmpfs mount corresponding to the
VE. This seems to be over-complicated: all this work can be done from
userspace, because we only have a hardcoded list of devices created
exclusively for VE on container start. Those are tty-related stuff and
mem devices, and we only need the latter to create devtmpfs nodes.
Moreover, it is buggy: ve_stop_ns, which destroys VE devtmpfs mount can
be called before a VE tty device is unregistered, resulting in a KP:
https://jira.sw.ru/browse/PSBM-35077
This patch therefore simplifies it. It makes the kernel only provide a
single empty tmpfs mount per VE, which appears on an attempt to mount
devtmpfs from inside a VE. The content of the fs is to be filled by the
userspace on container start, which will be done in the scope of
https://jira.sw.ru/browse/PSBM-35146
Vladimir Davydov (6):
Revert "ve/devtmpfs: Create required devices on container startup"
Revert "ve/devtmpfs: pass proper options string"
Revert "devtmpfs: containerize it with new obj ns operation"
Revert "fs: add data pointer to mount_ns()"
Revert "devtmpfs: per-VE mounts introduced"
devtmpfs: lightweight virtualization
Reviewed-by: Cyrill Gorcunov <gorcunov at virtuozzo.com>
===
This patch description:
This reverts commit 53343c3b231ed36d973e6d3ac2ab9ad7b7c87e25.
The whole point of devtmpfs is simplifying the system bootup logic.
There is absolutely no point in virtualizing it, because on container
start we create devices from a hardcoded list (these are ttys, which I'd
prefer not to create at all using ptys instead, but we have to live with
it for compatibility reasons for now). This means that it is enough to
provide the userspace with per VE tmpfs mount called "devtmpfs" and
teach it to make device nodes from a hardcoded list on container start
instead of implementing devtmpfs virtualization in the kernel. The
kernel part will be done by the following patches.
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
drivers/base/devtmpfs.c | 37 ++-----------------------------------
fs/sysfs/ve.c | 9 ---------
include/linux/kobject_ns.h | 2 --
3 files changed, 2 insertions(+), 46 deletions(-)
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 0448af8..349d6eb 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -366,46 +366,13 @@ int devtmpfs_mount(const char *mntdir)
static DECLARE_COMPLETION(setup_done);
-static struct path set_dev_pwd(struct device *dev)
-{
- const struct kobj_ns_type_operations *ops;
- struct path pwd = current->fs->pwd;
-
- ops = kobj_ns_ops(&dev->kobj);
- path_get(&pwd);
-
- if (ops && ops->devtmpfs) {
- const struct path *devtmpfs_root;
-
- devtmpfs_root = ops->devtmpfs(&dev->kobj);
- BUG_ON(!devtmpfs_root);
- set_fs_pwd(current->fs, devtmpfs_root);
- }
- return pwd;
-}
-
-static void drop_dev_pwd(struct path *pwd)
-{
- set_fs_pwd(current->fs, pwd);
- path_put(pwd);
-}
-
static int handle(const char *name, umode_t mode, kuid_t uid, kgid_t gid,
struct device *dev)
{
- struct path pwd;
- int err;
-
- pwd = set_dev_pwd(dev);
-
if (mode)
- err = handle_create(name, mode, uid, gid, dev);
+ return handle_create(name, mode, uid, gid, dev);
else
- err = handle_remove(name, dev);
-
- /* Restore kthread pwd */
- drop_dev_pwd(&pwd);
- return err;
+ return handle_remove(name, dev);
}
static int devtmpfsd(void *p)
diff --git a/fs/sysfs/ve.c b/fs/sysfs/ve.c
index 79ad6d5..bb28a4b 100644
--- a/fs/sysfs/ve.c
+++ b/fs/sysfs/ve.c
@@ -43,21 +43,12 @@ const void *ve_namespace(struct device *dev)
return (!dev->groups && dev_get_drvdata(dev)) ? dev_get_drvdata(dev) : get_ve0();
}
-static const struct path *ve_devtmpfs(const struct kobject *kobj)
-{
- struct device *dev = container_of(kobj, struct device, kobj);
- const struct ve_struct *ve = dev->class->namespace(dev);
-
- return &ve->devtmpfs_root;
-}
-
struct kobj_ns_type_operations ve_ns_type_operations = {
.type = KOBJ_NS_TYPE_VE,
.grab_current_ns = ve_grab_current_ns,
.netlink_ns = ve_netlink_ns,
.initial_ns = ve_initial_ns,
.drop_ns = ve_drop_ns,
- .devtmpfs = ve_devtmpfs,
};
static bool sysfs_perms_shown(struct ve_struct *ve, struct sysfs_dirent *sd)
diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
index ebc2e2d..b1395aa 100644
--- a/include/linux/kobject_ns.h
+++ b/include/linux/kobject_ns.h
@@ -19,7 +19,6 @@
struct sock;
struct kobject;
-struct path;
/*
* Namespace types which are used to tag kobjects and sysfs entries.
@@ -45,7 +44,6 @@ struct kobj_ns_type_operations {
const void *(*netlink_ns)(struct sock *sk);
const void *(*initial_ns)(void);
void (*drop_ns)(void *);
- const struct path *(*devtmpfs)(const struct kobject *);
};
int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
More information about the Devel
mailing list