[Devel] [PATCH rh7 1/2] ve: Introduce ve::devmnt list

Kirill Tkhai ktkhai at odin.com
Thu May 7 08:56:40 PDT 2015


Hi, Maxim,

В Ср, 06/05/2015 в 15:05 -0700, Maxim Patlasov пишет:
> Kirill, see please a couple of inline comments below...
> 
> On 04/29/2015 01:29 AM, Kirill Tkhai wrote:
> > 1)Porting patch "ve: mount option list" by Maxim Patlasov:
> >
> > The patch adds new fields to ve_struct: devmnt_list and devmnt_mutex.
> >
> > devmnt_list is the head of list of ve_devmnt structs. Each host block device
> > visible from CT can have no more than one struct ve_devmnt linked in
> > ve->devmnt_list. If ve_devmnt is present, it can be found by 'dev' field.
> > Each ve_devmnt struct may bear two strings: hidden and allowed options.
> >
> > hidden_options will be automatically added to CT-user-supplied mount options
> > after checking allowed_options. Only options listed in allowed_options are
> > allowed.
> >
> > devmnt_mutex is to protect operations on the list of ve_devmnt structs.
> >
> > 2)Porting patch "vecalls: VE_CONFIGURE_MOUNT_OPTIONS" by Maxim Patlasov.
> >
> > Reworking the interface using cgroups. Each CT now has a file:
> >
> > [ve_cgroup_mnt_pnt]/[CTID]/ve.mount_opts
> >
> > for configuring permittions for a block device. Below is permittions line
> > example:
> >
> > "0 xxx;1 balloon_ino=12,pfcache_csum,pfcache=/vz/pfcache;2 barrier=1"
> >
> > Here, xxx is st_rdev of device, '1' starts comma-separated list of
> > hidden options, and '2' is allowed ones.
> >
> > Signed-off-by: Kirill Tkhai <ktkhai at odin.com>
> > ---
> >   include/linux/ve.h |   11 ++++
> >   kernel/ve/ve.c     |  148 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   2 files changed, 159 insertions(+)
> >
> > diff --git a/include/linux/ve.h b/include/linux/ve.h
> > index 61f31fd..98dd244 100644
> > --- a/include/linux/ve.h
> > +++ b/include/linux/ve.h
> > @@ -123,6 +123,9 @@ struct ve_struct {
> >   	struct net		*ve_netns;
> >   	struct mutex		sync_mutex;
> >   
> > +	struct list_head	devmnt_list;
> > +	struct mutex		devmnt_mutex;
> > +
> >   	struct kmapset_key	ve_sysfs_perms;
> >   	struct list_head	ve_cgroup_head;
> >   #if IS_ENABLED(CONFIG_DEVTMPFS)
> > @@ -130,6 +133,14 @@ struct ve_struct {
> >   #endif
> >   };
> >   
> > +struct ve_devmnt {
> > +	struct list_head	link;
> > +
> > +	dev_t                   dev;
> > +	char			*allowed_options;
> > +	char			*hidden_options; /* balloon_ino, etc. */
> > +};
> > +
> >   #define VE_MEMINFO_DEFAULT      1       /* default behaviour */
> >   #define VE_MEMINFO_SYSTEM       0       /* disable meminfo virtualization */
> >   
> > diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> > index ee2435c..e963b56 100644
> > --- a/kernel/ve/ve.c
> > +++ b/kernel/ve/ve.c
> > @@ -749,6 +749,8 @@ static struct cgroup_subsys_state *ve_create(struct cgroup *cg)
> >   	INIT_LIST_HEAD(&ve->devices);
> >   	INIT_LIST_HEAD(&ve->ve_cgroup_head);
> >   	INIT_LIST_HEAD(&ve->ve_list);
> > +	INIT_LIST_HEAD(&ve->devmnt_list);
> > +	mutex_init(&ve->devmnt_mutex);
> >   	kmapset_init_key(&ve->ve_sysfs_perms);
> >   
> >   	return &ve->css;
> > @@ -782,11 +784,33 @@ static void ve_offline(struct cgroup *cg)
> >   	}
> >   }
> >   
> > +static void ve_devmnt_free(struct ve_devmnt *devmnt)
> > +{
> > +	if (!devmnt)
> > +		return;
> > +
> > +	kfree(devmnt->allowed_options);
> > +	kfree(devmnt->hidden_options);
> > +	kfree(devmnt);
> > +}
> > +
> > +static void free_ve_devmnts(struct ve_struct *ve)
> > +{
> > +	while (!list_empty(&ve->devmnt_list)) {
> > +		struct ve_devmnt *devmnt;
> > +
> > +		devmnt = list_first_entry(&ve->devmnt_list, struct ve_devmnt, link);
> > +		list_del(&devmnt->link);
> > +		ve_devmnt_free(devmnt);
> > +	}
> > +}
> > +
> >   static void ve_destroy(struct cgroup *cg)
> >   {
> >   	struct ve_struct *ve = cgroup_ve(cg);
> >   
> >   	kmapset_unlink(&ve->ve_sysfs_perms, &ve_sysfs_perms);
> > +	free_ve_devmnts(ve);
> >   
> >   	ve_log_destroy(ve);
> >   	kfree(ve->binfmt_misc);
> > @@ -934,6 +958,125 @@ static int ve_legacy_veid_read(struct cgroup *cg, struct cftype *cft,
> >   	return seq_printf(m, "%u\n", cgroup_ve(cg)->veid);
> >   }
> >   
> > +/*
> > + * 'data' for VE_CONFIGURE_MOUNT_OPTIONS is a zero-terminated string
> > + * consisting of substrings separated by MNTOPT_DELIM.
> > + */
> > +#define MNTOPT_DELIM ';'
> > +
> > +/*
> > + * Each substring has the form of "<type> <comma-separated-list-of-options>"
> > + * where types are:
> > + */
> > +enum {
> > +	MNTOPT_DEVICE = 0,
> > +	MNTOPT_HIDDEN = 1,
> > +	MNTOPT_ALLOWED = 2,
> > +};
> > +
> > +/*
> > + * 'ptr' points to the first character of buffer to parse
> > + * 'endp' points to the last character of buffer to parse
> > + */
> > +static int ve_parse_mount_options(const char *ptr, const char *endp,
> > +				  struct ve_devmnt *devmnt)
> > +{
> > +	while (*ptr) {
> > +		const char *delim = strchr(ptr, MNTOPT_DELIM) ? : endp;
> > +		char *space = strchr(ptr, ' ');
> > +		int type;
> > +		char *options, c;
> > +		int options_size = delim - space;
> > +		char **opts_pp = NULL; /* where to store 'options' */
> > +
> > +		if (delim == ptr || !space || options_size <= 1)
> > +			return -EINVAL;
> > +
> > +		if (sscanf(ptr, "%d%c", &type, &c) != 2 || c != ' ')
> > +			return -EINVAL;
> > +
> > +		if (type == MNTOPT_DEVICE) {
> > +			unsigned int dev;
> > +			if (devmnt->dev)
> > +				return -EINVAL; /* Already set */
> > +			if (sscanf(space + 1, "%u%c", &dev, &c) != 2 ||
> > +			    c != ';')
> > +				return -EINVAL;
> > +			devmnt->dev = new_decode_dev(dev);
> 
> I think major:minor would be better (human readable) string 
> representation of devmnt->dev than unsigned integer st_rdev.

This makes a sense. I'll send v2 series.

> > +			goto next;
> > +		}
> > +
> > +	        options = kmalloc(options_size, GFP_KERNEL);
> > +		if (!options)
> > +			return -ENOMEM;
> > +
> > +		strncpy(options, space + 1, options_size - 1);
> > +		options[options_size - 1] = 0;
> > +
> > +		switch (type) {
> > +		case MNTOPT_ALLOWED:
> > +			opts_pp = &devmnt->allowed_options;
> > +			break;
> > +		case MNTOPT_HIDDEN:
> > +			opts_pp = &devmnt->hidden_options;
> > +			break;
> > +		};
> > +
> > +		/* wrong type or already set */
> > +		if (!opts_pp || *opts_pp) {
> > +			kfree(options);
> > +			return -EINVAL;
> > +		}
> > +
> > +		*opts_pp = options;
> > +next:
> > +		if (!*delim)
> > +			break;
> > +
> > +		ptr = delim + 1;
> > +	}
> > +
> > +	if (!devmnt->dev)
> > +		return -EINVAL;
> > +	return 0;
> > +}
> > +
> > +static int ve_mount_opts_write(struct cgroup *cg, struct cftype *cft,
> > +			       const char *buffer)
> > +{
> > +	struct ve_struct *ve = cgroup_ve(cg);
> > +	struct ve_devmnt *devmnt, *old;
> > +	int size, err;
> > +
> > +	size = strlen(buffer);
> > +	if (size <= 1)
> > +		return -EINVAL;
> > +
> > +	devmnt = kzalloc(sizeof(*devmnt), GFP_KERNEL);
> > +	if (!devmnt)
> > +		return -ENOMEM;
> > +
> > +	err = ve_parse_mount_options(buffer, buffer + size, devmnt);
> > +	if (err) {
> > +		ve_devmnt_free(devmnt);
> > +		return err;
> > +	}
> > +
> > +	mutex_lock(&ve->devmnt_mutex);
> > +	list_for_each_entry(old, &ve->devmnt_list, link) {
> > +		/* Delete old devmnt */
> > +		if (old->dev == devmnt->dev) {
> > +			list_del(&old->link);
> > +			ve_devmnt_free(old);
> > +			break;
> > +		}
> > +	}
> > +	list_add(&devmnt->link, &ve->devmnt_list);
> > +	mutex_unlock(&ve->devmnt_mutex);
> > +
> > +	return 0;
> > +}
> > +
> >   static struct cftype ve_cftypes[] = {
> >   	{
> >   		.name = "state",
> > @@ -946,6 +1089,11 @@ static struct cftype ve_cftypes[] = {
> >   		.flags = CFTYPE_NOT_ON_ROOT,
> >   		.read_seq_string = ve_legacy_veid_read,
> >   	},
> > +	{
> > +		.name = "mount_opts",
> > +		.flags = CFTYPE_NOT_ON_ROOT,
> > +		.write_string = ve_mount_opts_write,
> > +	},
> 
> Why no read method?

This series aims to port the functionality which exists in pcs6.
There were not a possibility to read devperms, so the series
doesn't have.

We can implement that on top of the series later if userspace
needs that.

Thanks,
Kirill





More information about the Devel mailing list