[Devel] [RFC rh7 v5] ve/tty: vt -- Implement per VE support for console and terminals

Vladimir Davydov vdavydov at parallels.com
Thu Aug 27 08:00:28 PDT 2015


On Mon, Aug 24, 2015 at 02:10:49PM +0300, Cyrill Gorcunov wrote:
...
> Index: linux-pcs7.git/drivers/tty/pty.c
> ===================================================================
> --- linux-pcs7.git.orig/drivers/tty/pty.c
> +++ linux-pcs7.git/drivers/tty/pty.c
> @@ -820,10 +820,566 @@ static void __init unix98_pty_init(void)
>  static inline void unix98_pty_init(void) { }
>  #endif
>  
> +#if defined(CONFIG_VE) && defined(CONFIG_UNIX98_PTYS)
> +
> +/*
> + * VTTY architecture overview.
> + *
> + * With VTTY we make /dev/console and /dev/tty[X] virtualized
> + * per container (note the real names may vary because the
> + * kernel itself uses major:minor numbers to distinguish
> + * devices and doesn't care how they are named inside /dev.
> + * /dev/console stands for TTYAUX_MAJOR:1 while /dev/tty[X]
> + * stands for TTY_MAJOR:[0:12]. That said from inside of
> + * VTTY /dev/console is the same as /dev/tty0.
> + *
> + * For every container here is a tty map represented by
> + * vtty_map_t. It carries @veid of VE and associated slave
> + * tty peers. Also there is a per-VE spinlock to order
> + * close() operation.
> + *
> + * map
> + *  veid -> CTID
> + *    vttys -> [ 0 ]
> + *               `- @slave -> link -> @master
> + *             [ 1 ]
> + *               `- @slave -> link -> @master
> + *
> + * When container is opening tty by self (init process
> + * or whatever) we create @slave peer and a linked master,
> + * and carry them inside map until the kernel explicitly
> + * close them. Because the @master may be hanging around
> + * unconnected during the whole lifetime of a pair, we're
> + * assiging an extra refernce on it so kernel won't complain
> + * that link has nonzero @count but no real file descriptor
> + * assigned.
> + */
> +
> +#include <linux/ve.h>
> +#include <linux/file.h>
> +#include <linux/anon_inodes.h>
> +
> +static struct tty_driver *vttym_driver;
> +static struct tty_driver *vttys_driver;
> +static DEFINE_IDR(vtty_idr);
> +
> +static struct file_operations vtty_fops;
> +
> +#define vtty_match_index(idx)		((idx) >= 0 && (idx) < MAX_NR_VTTY_CONSOLES)

nit: this line is longer than 80 symbols

> +
> +typedef struct {
> +	envid_t			veid;
> +	struct tty_struct	*vttys[MAX_NR_VTTY_CONSOLES];
> +	spinlock_t		close_lock;
> +} vtty_map_t;
> +
> +static vtty_map_t *vtty_map_lookup(envid_t veid)
> +{
> +	lockdep_assert_held(&tty_mutex);
> +	return idr_find(&vtty_idr, veid);
> +}
> +
> +static void vtty_map_set(vtty_map_t *map, struct tty_struct *tty)
> +{
> +	lockdep_assert_held(&tty_mutex);

nit: WARN_ON(map->vttys[tty->index] != NULL)

> +	map->vttys[tty->index] = tty;
> +}
> +
> +static void vtty_map_del(vtty_map_t *map, struct tty_struct *tty)
> +{
> +	lockdep_assert_held(&tty_mutex);
> +	if (map) {

nit: WARN_ON(map->vttys[tty->index] !=
	     (tty->driver == ttys_driver ? tty : tty->link))

> +		tty->driver_data = tty->link->driver_data = NULL;
> +		map->vttys[tty->index] = NULL;
> +	}
> +}
> +
> +static void vtty_map_free(vtty_map_t *map)
> +{
> +	lockdep_assert_held(&tty_mutex);
> +	idr_remove(&vtty_idr, map->veid);
> +	kfree(map);
> +}
> +
> +static vtty_map_t *vtty_map_alloc(envid_t veid)
> +{
> +	vtty_map_t *map = kzalloc(sizeof(*map), GFP_KERNEL);
> +
> +	lockdep_assert_held(&tty_mutex);
> +	if (map) {
> +		map->veid = veid;
> +		veid = idr_alloc(&vtty_idr, map, veid, veid + 1, GFP_KERNEL);
> +		if (veid < 0) {
> +			kfree(map);
> +			return ERR_PTR(veid);
> +		}
> +		spin_lock_init(&map->close_lock);
> +	} else
> +		map = ERR_PTR(-ENOMEM);
> +	return map;
> +}
> +
> +/*
> + * vttys are never supposed to be opened from inside
> + * of VE0 except special ioctl call, so treat zero as
> + * "unused" sign.
> + */
> +static unsigned long current_veid;
> +
> +static void vtty_set_context(envid_t veid)
> +{

nit: WARN_ON(veid)

> +	lockdep_assert_held(&tty_mutex);
> +	current_veid = veid;
> +}
> +
> +static void vtty_drop_context(void)
> +{
> +	lockdep_assert_held(&tty_mutex);
> +	current_veid = 0;
> +}
> +
> +static envid_t vtty_get_context(void)
> +{
> +	BUILD_BUG_ON(sizeof(current_veid) < sizeof(envid_t));
> +	lockdep_assert_held(&tty_mutex);
> +
> +	return current_veid ?: get_exec_env()->veid;
> +}
> +
> +static struct tty_struct *vtty_lookup(struct tty_driver *driver,
> +				      struct inode *inode, int idx)
> +{
> +	vtty_map_t *map = vtty_map_lookup(vtty_get_context());
> +	struct tty_struct *tty;
> +
> +	WARN_ON_ONCE((driver != vttym_driver) &&
> +		     (driver != vttys_driver));

nit: this check is rather useless

> +
> +	if (!vtty_match_index(idx))
> +		return ERR_PTR(-EIO);
> +
> +	/*
> +	 * Nothing ever been opened yet, allocate a new
> +	 * tty map together with both peers from the scratch
> +	 * in install procedure.
> +	 */
> +	if (!map)
> +		return NULL;
> +
> +	tty = map->vttys[idx];
> +	if (tty) {
> +		if (driver == vttym_driver)
> +			tty = tty->link;
> +		WARN_ON(!tty);
> +	}
> +	return tty;
> +}
> +
> +static void vtty_standard_install(vtty_map_t *map, struct tty_driver *driver, struct tty_struct *tty)

nit: this line is longer than 80 symbols

> +{
> +	WARN_ON(tty_init_termios(tty));
> +
> +	tty_driver_kref_get(driver);
> +	tty->driver_data = map;
> +
> +	tty_port_init(tty->port);
> +	tty->port->itty = tty;
> +}
> +
> +static struct tty_struct *vtty_install_peer(vtty_map_t *map, struct tty_driver *driver,
> +					    struct tty_port *port, int index)

ditto

> +{
> +	struct tty_struct *tty;
> +
> +	tty = alloc_tty_struct();
> +	if (!tty)
> +		return ERR_PTR(-ENOMEM);
> +	initialize_tty_struct(tty, driver, index);
> +
> +	tty->port = port;
> +	vtty_standard_install(map, driver, tty);
> +	return tty;
> +}
> +
> +static int vtty_install(struct tty_driver *driver, struct tty_struct *tty)
> +{
> +	envid_t veid = vtty_get_context();
> +	struct tty_port *peer_port;
> +	struct tty_struct *peer;
> +	vtty_map_t *map;
> +	int ret;
> +
> +	WARN_ON_ONCE(driver != vttys_driver);
> +
> +	map = vtty_map_lookup(veid);
> +	if (!map) {
> +		map = vtty_map_alloc(veid);
> +		if (IS_ERR(map))
> +			return PTR_ERR(map);
> +	}
> +
> +	tty->port = kzalloc(sizeof(*tty->port), GFP_KERNEL);
> +	peer_port = kzalloc(sizeof(*peer_port), GFP_KERNEL);
> +	if (!tty->port || !peer_port) {
> +		ret = -ENOMEM;
> +		goto err_free;
> +	}
> +
> +	/*
> +	 * The peer will have no real @fd assigned yet,
> +	 * so add an extra @fd sign over so kernel won't
> +	 * treat it as closing until a real file appear
> +	 * on the peer. Sill this spare peer may be in this
> +	 * state during the whole pair lifetime (imagine
> +	 * noone ever opened it and @tty get closed).
> +	 */
> +	peer = vtty_install_peer(map, vttym_driver, peer_port, tty->index);
> +	if (IS_ERR(peer)) {
> +		ret = PTR_ERR(peer);
> +		goto err_free;
> +	}
> +
> +	vtty_standard_install(map, vttys_driver, tty);
> +	vtty_map_set(map, tty);
> +
> +	tty->count++;
> +	tty->count++;
> +	peer->count++;
> +	set_bit(TTY_EXTRA_REF, &peer->flags);
> +
> +	tty->link = peer;
> +	peer->link = tty;
> +	return 0;
> +
> +err_free:
> +	kfree(tty->port);
> +	kfree(peer_port);
> +	return ret;
> +}
> +
> +static int vtty_open(struct tty_struct *tty, struct file *filp)
> +{
> +	if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
> +		return -EIO;

What's the point in this check? You only set TTY_OTHER_CLOSED when both
ends are closed and hence the pair is dead.

> +
> +	clear_bit(TTY_IO_ERROR, &tty->flags);
> +	clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);

Again, you only set TTY_IO_ERROR and TTY_OTHER_CLOSED if the pair is
dead and therefore cannot be reopened, so I think you don't need to
touch these bits at all.

> +	set_bit(TTY_THROTTLED, &tty->flags);
> +	return 0;
> +}
> +
> +static void vtty_close(struct tty_struct *tty, struct file *filp)
> +{
> +	struct tty_struct *peer = tty->link;
> +	vtty_map_t *map = tty->driver_data;
> +
> +	spin_lock(&map->close_lock);

This spinlock doesn't protect us from concurrent modifications of
peer->count from e.g. tty_reopen. What about taking tty_mutex instead?

On the second thought, this wouldn't help us if we patched mainstream
kernel, because tty_mutex is not held for tty->count modifications there
anymore. That said, if we simply substitute the spinlock with tty_mutex,
we will surely have problems sooner or later...

May be, we'd better move our ->count tweaking to the generic path, i.e.
tty_release, similarly to pty? We could distinguish vtty from pty by
checking TTY_EXTRA_REF. What do you think?

> +
> +	/*
> +	 * More active file descriptors are hooked on,
> +	 * wait for the last for closing.
> +	 */
> +	if (tty->count > 2) {
> +		spin_unlock(&map->close_lock);
> +		return;
> +	}
> +
> +	/*
> +	 * Here is a small trick we do with @count:
> +	 * the pair may be opened from inside of
> +	 * container (slave first) as well as from
> +	 * the node (master first). But if master
> +	 * is about to close and there is still an
> +	 * active file hooked on a slave peer we defer
> +	 * the real pair destruction until slave is
> +	 * closed (incrementing @count).
> +	 */
> +	if (tty->driver == vttys_driver) {
> +		if (peer->count == 1) {
> +			clear_bit(TTY_EXTRA_REF, &peer->flags);
> +			peer->count--;
> +			tty->count--;
> +		}
> +	} else {
> +		if (peer->count == 1) {
> +			clear_bit(TTY_EXTRA_REF, &tty->flags);
> +			tty->count--;
> +		} else
> +			peer->count++;
> +	}
> +
> +	if (tty->count == 1) {
> +		set_bit(TTY_IO_ERROR, &tty->flags);
> +		set_bit(TTY_OTHER_CLOSED, &peer->flags);

See my comment to vtty_open regarding setting these bits.

> +	}
> +
> +	spin_unlock(&map->close_lock);
> +
> +	wake_up_interruptible(&tty->read_wait);
> +	wake_up_interruptible(&tty->write_wait);
> +
> +	wake_up_interruptible(&peer->read_wait);
> +	wake_up_interruptible(&peer->write_wait);
> +}
> +
> +static void vtty_shutdown(struct tty_struct *tty)
> +{
> +	vtty_map_del(tty->driver_data, tty);
> +}
> +
> +static int vtty_write(struct tty_struct *tty, const unsigned char *buf, int count)

nit: this line is longer than 80 symbols

> +{
> +	struct tty_struct *peer = tty->link;
> +
> +	if (tty->stopped)
> +		return 0;
> +
> +	if (count > 0) {
> +		count = tty_insert_flip_string(peer->port, buf, count);
> +		if (count) {
> +			tty_flip_buffer_push(peer->port);
> +			tty_wakeup(tty);
> +		} else {
> +			/*
> +			 * Flush the slave reader if noone
> +			 * is actually hooked on. Otherwise
> +			 * wait until reader fetch all data.
> +			 */
> +			if (peer->count < 2)
> +				tty_perform_flush(peer, TCIFLUSH);
> +		}
> +	}
> +
> +	return count;
> +}
> +
> +static int vtty_write_room(struct tty_struct *tty)
> +{
> +	struct tty_struct *peer = tty->link;
> +
> +	if (tty->stopped)
> +		return 0;
> +
> +	if (peer->count < 2)
> +		return 2048;
> +
> +	return pty_space(peer);
> +}
> +
> +static const struct tty_operations vtty_ops = {
> +	.lookup		= vtty_lookup,
> +	.install	= vtty_install,
> +	.open		= vtty_open,
> +	.close		= vtty_close,
> +	.shutdown	= vtty_shutdown,
> +	.cleanup	= pty_cleanup,
> +	.write		= vtty_write,
> +	.write_room	= vtty_write_room,
> +	.set_termios	= pty_set_termios,
> +	.unthrottle	= pty_unthrottle,

> +	.remove		= pty_unix98_remove,

nit: I wouldn't reuse this one; it's empty, so we it shouldn't be very
difficult to implement our own vtty_remove; the benefit is that we can
drop dependency from UNIX98_PTYS

> +};
> +
> +struct tty_driver *vtty_console_driver(int *index)
> +{
> +	*index = 0;
> +	return vttys_driver;
> +}
> +
> +struct tty_driver *vtty_driver(dev_t dev, int *index)
> +{
> +	if (MAJOR(dev) == TTY_MAJOR &&
> +	    MINOR(dev) < MAX_NR_VTTY_CONSOLES) {
> +		*index = MINOR(dev);
> +		return vttys_driver;
> +	}
> +	return NULL;
> +}
> +
> +static void ve_vtty_fini(void *data)
> +{
> +	struct ve_struct *ve = data;
> +	vtty_map_t *map;
> +	int i;
> +
> +	mutex_lock(&tty_mutex);
> +	map = vtty_map_lookup(ve->veid);
> +	if (map) {
> +		for (i = 0; i < MAX_NR_VTTY_CONSOLES; i++) {
> +			struct tty_struct *tty = map->vttys[i];
> +			if (!tty)
> +				continue;
> +			tty->driver_data = tty->link->driver_data = NULL;
> +		}
> +		vtty_map_free(map);
> +	}
> +	mutex_unlock(&tty_mutex);
> +}
> +
> +static struct ve_hook vtty_hook = {
> +	.fini           = ve_vtty_fini,
> +	.priority       = HOOK_PRIO_DEFAULT,
> +	.owner          = THIS_MODULE,
> +};
> +
> +static int __init vtty_init(void)
> +{
> +#define VTTY_DRIVER_ALLOC_FLAGS			\
> +	(TTY_DRIVER_REAL_RAW		|	\
> +	 TTY_DRIVER_RESET_TERMIOS	|	\
> +	 TTY_DRIVER_DYNAMIC_DEV		|	\
> +	 TTY_DRIVER_INSTALLED		|	\
> +	 TTY_DRIVER_DEVPTS_MEM)
> +
> +	vttym_driver = tty_alloc_driver(MAX_NR_VTTY_CONSOLES, VTTY_DRIVER_ALLOC_FLAGS);

nit: this line is longer than 80 symbols

> +	if (IS_ERR(vttym_driver))
> +		panic(pr_fmt("Can't allocate master vtty driver\n"));
> +
> +	vttys_driver = tty_alloc_driver(MAX_NR_VTTY_CONSOLES, VTTY_DRIVER_ALLOC_FLAGS);

ditto

> +	if (IS_ERR(vttys_driver))
> +		panic(pr_fmt("Can't allocate slave vtty driver\n"));
> +
> +	vttym_driver->driver_name		= "vtty_master";
> +	vttym_driver->name			= "vttym";
> +	vttym_driver->name_base			= 0;
> +	vttym_driver->major			= 0;
> +	vttym_driver->minor_start		= 0;
> +	vttym_driver->type			= TTY_DRIVER_TYPE_PTY;
> +	vttym_driver->subtype			= PTY_TYPE_MASTER;
> +	vttym_driver->init_termios		= tty_std_termios;
> +	vttym_driver->init_termios.c_iflag	= 0;
> +	vttym_driver->init_termios.c_oflag	= 0;
> +
> +	/* 38400 boud rate, 8 bit char size, enable receiver */
> +	vttym_driver->init_termios.c_cflag	= B38400 | CS8 | CREAD;
> +	vttym_driver->init_termios.c_lflag	= 0;
> +	vttym_driver->init_termios.c_ispeed	= 38400;
> +	vttym_driver->init_termios.c_ospeed	= 38400;
> +	tty_set_operations(vttym_driver, &vtty_ops);
> +
> +	vttys_driver->driver_name		= "vtty_slave";
> +	vttys_driver->name			= "vttys";
> +	vttys_driver->name_base			= 0;
> +	vttys_driver->major			= 0;
> +	vttys_driver->minor_start		= 0;
> +	vttys_driver->type			= TTY_DRIVER_TYPE_PTY;
> +	vttys_driver->subtype			= PTY_TYPE_SLAVE;
> +	vttys_driver->init_termios		= tty_std_termios;
> +	vttys_driver->init_termios.c_iflag	= 0;
> +	vttys_driver->init_termios.c_oflag	= 0;
> +	vttys_driver->init_termios.c_cflag	= B38400 | CS8 | CREAD;
> +	vttys_driver->init_termios.c_lflag	= 0;
> +	vttys_driver->init_termios.c_ispeed	= 38400;
> +	vttys_driver->init_termios.c_ospeed	= 38400;
> +	tty_set_operations(vttys_driver, &vtty_ops);
> +
> +	if (tty_register_driver(vttym_driver))
> +		panic(pr_fmt("Can't register master vtty driver\n"));
> +
> +	if (tty_register_driver(vttys_driver))
> +		panic(pr_fmt("Can't register slave vtty driver\n"));
> +
> +	ve_hook_register(VE_SS_CHAIN, &vtty_hook);
> +	tty_default_fops(&vtty_fops);
> +	return 0;
> +}
> +
> +int vtty_open_master(envid_t veid, int idx)
> +{
> +	struct tty_struct *tty;
> +	struct file *file;
> +	char devname[64];
> +	int fd, ret;
> +
> +	if (!vtty_match_index(idx))
> +		return -EIO;
> +
> +	fd = get_unused_fd_flags(0);
> +	if (fd < 0)
> +		return fd;
> +
> +	snprintf(devname, sizeof(devname), "v%utty%d", veid, idx);
> +	file = anon_inode_getfile(devname, &vtty_fops, NULL, O_RDWR);
> +	if (IS_ERR(file)) {
> +		ret = PTR_ERR(file);
> +		goto err_put_unused_fd;
> +	}
> +	nonseekable_open(NULL, file);
> +
> +	ret = tty_alloc_file(file);
> +	if (ret)
> +		goto err_fput;
> +
> +	/*
> +	 * Opening comes from ve0 context so
> +	 * setup VE's context until master fetched.
> +	 * This is done under @tty_mutex so noone
> +	 * else would access it while we're holding
> +	 * the lock.
> +	 */
> +	mutex_lock(&tty_mutex);
> +	vtty_set_context(veid);
> +
> +	tty = vtty_lookup(vttym_driver, NULL, idx);
> +	if (!tty ||
> +	    (test_bit(TTY_CLOSING, &tty->flags) ||
> +	     test_bit(TTY_CLOSING, &tty->link->flags))) {
> +		/*
> +		 * The previous connection is about to
> +		 * be closed so drop it from the map and
> +		 * allocate a new one.
> +		 */
> +		if (tty)
> +			vtty_map_del(tty->driver_data, tty);
> +		tty = tty_init_dev(vttys_driver, idx);

So here you install a new tty slave in place of the closing one, right?
If ->shutdown is called for the old tty after this point, you'll get
empty map. This has to be fixed. Checking that we're actually clearing
our own tty in vtty_map_del should do the trick.

> +		if (IS_ERR(tty))
> +			goto err_install;
> +		tty->count--;
> +		tty_unlock(tty);
> +		tty = tty->link;
> +	}
> +
> +	/* One master at a time */
> +	if (tty->count > 1) {
> +		ret = -EBUSY;
> +		goto err_install;
> +	}
> +
> +	vtty_drop_context();
> +
> +	WARN_ON(!test_bit(TTY_LDISC, &tty->flags));
> +
> +	tty_add_file(tty, file);
> +	tty->count++;
> +	fd_install(fd, file);
> +	WARN_ON(vtty_open(tty, file));
> +
> +	mutex_unlock(&tty_mutex);
> +	ret = fd;
> +
> +out:
> +	return ret;
> +
> +err_install:
> +	vtty_drop_context();
> +	mutex_unlock(&tty_mutex);
> +	tty_free_file(file);
> +err_fput:
> +	file->f_op = NULL;
> +	fput(file);
> +err_put_unused_fd:
> +	put_unused_fd(fd);
> +	goto out;
> +}



More information about the Devel mailing list