[Devel] [PATCH RHEL7 COMMIT] ve/devmnt: Introduce ve::devmnt list

Konstantin Khorenko khorenko at odin.com
Mon May 18 01:52:09 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.1
------>
commit 16a628a25414e4cca83d7aa8574fbe22f3e8f500
Author: Kirill Tkhai <ktkhai at odin.com>
Date:   Mon May 18 12:52:09 2015 +0400

    ve/devmnt: Introduce ve::devmnt list
    
    1)Porting patch "ve: mount option list" by Maxim Patlasov:
    
    The patch adds new fields to ve_struct: devmnt_list and devmnt_mutex.
    devmnt_list is the head of list of ve_devmnt structs. Each host block device
    visible from CT can have no more than one struct ve_devmnt linked in
    ve->devmnt_list. If ve_devmnt is present, it can be found by 'dev' field.
    
    Each ve_devmnt struct may bear two strings: hidden and allowed options.
    hidden_options will be automatically added to CT-user-supplied mount options
    after checking allowed_options. Only options listed in allowed_options are
    allowed.
    
    devmnt_mutex is to protect operations on the list of ve_devmnt structs.
    
    2)Porting patch "vecalls: VE_CONFIGURE_MOUNT_OPTIONS" by Maxim Patlasov.
    
    Reworking the interface using cgroups. Each CT now has a file:
    
    [ve_cgroup_mnt_pnt]/[CTID]/ve.mount_opts
    
    for configuring permittions for a block device. Below is permittions line
    example:
    
    "0 major:minor;1 balloon_ino=12,pfcache_csum,pfcache=/vz/pfcache;2 barrier=1"
    
    Here, major:minor is a device, '1' starts comma-separated list of
    hidden options, and '2' is allowed ones.
    
    https://jira.sw.ru/browse/PSBM-32273
    
    Signed-off-by: Kirill Tkhai <ktkhai at odin.com>
    Acked-by: Maxim Patlasov <mpatlasov at openvz.org>
---
 include/linux/ve.h |  11 ++++
 kernel/ve/ve.c     | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 162 insertions(+)

diff --git a/include/linux/ve.h b/include/linux/ve.h
index 03e90e4..8b70dbe 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -123,12 +123,23 @@ struct ve_struct {
 	struct net		*ve_netns;
 	struct mutex		sync_mutex;
 
+	struct list_head	devmnt_list;
+	struct mutex		devmnt_mutex;
+
 	struct kmapset_key	ve_sysfs_perms;
 #if IS_ENABLED(CONFIG_DEVTMPFS)
 	struct path		devtmpfs_root;
 #endif
 };
 
+struct ve_devmnt {
+	struct list_head	link;
+
+	dev_t                   dev;
+	char			*allowed_options;
+	char			*hidden_options; /* balloon_ino, etc. */
+};
+
 #define VE_MEMINFO_DEFAULT      1       /* default behaviour */
 #define VE_MEMINFO_SYSTEM       0       /* disable meminfo virtualization */
 
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 609ea75..6ab409f 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -40,6 +40,7 @@
 #include <linux/task_work.h>
 #include <linux/tty.h>
 #include <linux/console.h>
+#include <linux/ctype.h>
 
 #include <uapi/linux/vzcalluser.h>
 #include <linux/venet.h>
@@ -710,6 +711,8 @@ do_init:
 	mutex_init(&ve->sync_mutex);
 	INIT_LIST_HEAD(&ve->devices);
 	INIT_LIST_HEAD(&ve->ve_list);
+	INIT_LIST_HEAD(&ve->devmnt_list);
+	mutex_init(&ve->devmnt_mutex);
 	kmapset_init_key(&ve->ve_sysfs_perms);
 
 	return &ve->css;
@@ -734,11 +737,33 @@ static void ve_offline(struct cgroup *cg)
 	veid_free(ve->veid);
 }
 
+static void ve_devmnt_free(struct ve_devmnt *devmnt)
+{
+	if (!devmnt)
+		return;
+
+	kfree(devmnt->allowed_options);
+	kfree(devmnt->hidden_options);
+	kfree(devmnt);
+}
+
+static void free_ve_devmnts(struct ve_struct *ve)
+{
+	while (!list_empty(&ve->devmnt_list)) {
+		struct ve_devmnt *devmnt;
+
+		devmnt = list_first_entry(&ve->devmnt_list, struct ve_devmnt, link);
+		list_del(&devmnt->link);
+		ve_devmnt_free(devmnt);
+	}
+}
+
 static void ve_destroy(struct cgroup *cg)
 {
 	struct ve_struct *ve = cgroup_ve(cg);
 
 	kmapset_unlink(&ve->ve_sysfs_perms, &ve_sysfs_perms);
+	free_ve_devmnts(ve);
 
 	ve_log_destroy(ve);
 	kfree(ve->binfmt_misc);
@@ -886,6 +911,127 @@ static int ve_legacy_veid_read(struct cgroup *cg, struct cftype *cft,
 	return seq_printf(m, "%u\n", cgroup_ve(cg)->veid);
 }
 
+/*
+ * 'data' for VE_CONFIGURE_MOUNT_OPTIONS is a zero-terminated string
+ * consisting of substrings separated by MNTOPT_DELIM.
+ */
+#define MNTOPT_DELIM ';'
+
+/*
+ * Each substring has the form of "<type> <comma-separated-list-of-options>"
+ * where types are:
+ */
+enum {
+	MNTOPT_DEVICE = 0,
+	MNTOPT_HIDDEN = 1,
+	MNTOPT_ALLOWED = 2,
+};
+
+/*
+ * 'ptr' points to the first character of buffer to parse
+ * 'endp' points to the last character of buffer to parse
+ */
+static int ve_parse_mount_options(const char *ptr, const char *endp,
+				  struct ve_devmnt *devmnt)
+{
+	while (*ptr) {
+		const char *delim = strchr(ptr, MNTOPT_DELIM) ? : endp;
+		char *space = strchr(ptr, ' ');
+		int type;
+		char *options, c, s;
+		int options_size = delim - space;
+		char **opts_pp = NULL; /* where to store 'options' */
+
+		if (delim == ptr || !space || options_size <= 1 ||
+		    !isdigit(*ptr) || space > delim)
+			return -EINVAL;
+
+		if (sscanf(ptr, "%d%c", &type, &c) != 2 || c != ' ')
+			return -EINVAL;
+
+		if (type == MNTOPT_DEVICE) {
+			unsigned major, minor;
+			if (devmnt->dev)
+				return -EINVAL; /* Already set */
+			if (sscanf(space + 1, "%u%c%u%c", &major, &c,
+							  &minor, &s) != 4 ||
+			    c != ':' || s != MNTOPT_DELIM)
+				return -EINVAL;
+			devmnt->dev = MKDEV(major, minor);
+			goto next;
+		}
+
+	        options = kmalloc(options_size, GFP_KERNEL);
+		if (!options)
+			return -ENOMEM;
+
+		strncpy(options, space + 1, options_size - 1);
+		options[options_size - 1] = 0;
+
+		switch (type) {
+		case MNTOPT_ALLOWED:
+			opts_pp = &devmnt->allowed_options;
+			break;
+		case MNTOPT_HIDDEN:
+			opts_pp = &devmnt->hidden_options;
+			break;
+		};
+
+		/* wrong type or already set */
+		if (!opts_pp || *opts_pp) {
+			kfree(options);
+			return -EINVAL;
+		}
+
+		*opts_pp = options;
+next:
+		if (!*delim)
+			break;
+
+		ptr = delim + 1;
+	}
+
+	if (!devmnt->dev)
+		return -EINVAL;
+	return 0;
+}
+
+static int ve_mount_opts_write(struct cgroup *cg, struct cftype *cft,
+			       const char *buffer)
+{
+	struct ve_struct *ve = cgroup_ve(cg);
+	struct ve_devmnt *devmnt, *old;
+	int size, err;
+
+	size = strlen(buffer);
+	if (size <= 1)
+		return -EINVAL;
+
+	devmnt = kzalloc(sizeof(*devmnt), GFP_KERNEL);
+	if (!devmnt)
+		return -ENOMEM;
+
+	err = ve_parse_mount_options(buffer, buffer + size, devmnt);
+	if (err) {
+		ve_devmnt_free(devmnt);
+		return err;
+	}
+
+	mutex_lock(&ve->devmnt_mutex);
+	list_for_each_entry(old, &ve->devmnt_list, link) {
+		/* Delete old devmnt */
+		if (old->dev == devmnt->dev) {
+			list_del(&old->link);
+			ve_devmnt_free(old);
+			break;
+		}
+	}
+	list_add(&devmnt->link, &ve->devmnt_list);
+	mutex_unlock(&ve->devmnt_mutex);
+
+	return 0;
+}
+
 static struct cftype ve_cftypes[] = {
 	{
 		.name = "state",
@@ -898,6 +1044,11 @@ static struct cftype ve_cftypes[] = {
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.read_seq_string = ve_legacy_veid_read,
 	},
+	{
+		.name = "mount_opts",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.write_string = ve_mount_opts_write,
+	},
 	{ }
 };
 



More information about the Devel mailing list