[CRIU] [PATCH 1/2] tty: Write unread pty buffers on post dump stage

Pavel Emelyanov xemul at virtuozzo.com
Thu Apr 21 06:48:43 PDT 2016


On 04/19/2016 10:37 PM, Cyrill Gorcunov wrote:
> When unread data present on peers we currently simply ignore
> it but actually we can try to fetch it in non(that)destructive
> way.
> 
> For this sake we collect tty peers into a separate list
> (currently only _paired_ ptys are supported, which means
> that both ends must belong the container. The reason for
> that it due to we can't guarantee anything on external
> peers which can continue generating data during a dumping
> procedure).
> 
> Once they are collected at the post dump stage we read the
> data from peers and write it into the image. In case if
> some error happened we write the data back into
> appropriate pty.
> 
> The same applies to restore stage -- we simply link
> peers together and when appropriate peer get opened
> we restore its neighbour content.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  criu/cr-dump.c               |   2 +
>  criu/cr-restore.c            |   1 +
>  criu/image-desc.c            |   1 +
>  criu/include/image-desc.h    |   1 +
>  criu/include/magic.h         |   1 +
>  criu/include/protobuf-desc.h |   1 +
>  criu/include/tty.h           |   2 +
>  criu/tty.c                   | 292 ++++++++++++++++++++++++++++++++++++++++++-
>  images/tty.proto             |   5 +
>  lib/py/images/images.py      |   1 +
>  10 files changed, 305 insertions(+), 2 deletions(-)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 5ac9fd041e4e..754db6d93478 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1545,6 +1545,8 @@ static int cr_dump_finish(int ret)
>  {
>  	int post_dump_ret = 0;
>  
> +	tty_post_dump(ret);
> +

We already have tty_verify_active_pairs() and dump_verify_tty_sids()
scattered over the cr_dump_tasks(). Can we reduce this mess, instead
of increasing one?

>  	if (disconnect_from_page_server())
>  		ret = -1;
>  
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 34db7f7a360d..36ff5f736a22 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -178,6 +178,7 @@ static struct collect_image_info *cinfos[] = {
>  	&fanotify_mark_cinfo,
>  	&tty_info_cinfo,
>  	&tty_cinfo,
> +	&tty_cdata,
>  	&tunfile_cinfo,
>  	&ext_file_cinfo,
>  	&timerfd_cinfo,
> diff --git a/criu/image-desc.c b/criu/image-desc.c
> index 2949c592b17f..2b31354f29da 100644
> --- a/criu/image-desc.c
> +++ b/criu/image-desc.c
> @@ -82,6 +82,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
>  	FD_ENTRY(BINFMT_MISC,	"binfmt-misc-%d"),
>  	FD_ENTRY(TTY_FILES,	"tty"),
>  	FD_ENTRY(TTY_INFO,	"tty-info"),
> +	FD_ENTRY_F(TTY_DATA,	"tty-data", O_NOBUF),
>  	FD_ENTRY(FILE_LOCKS,	"filelocks"),
>  	FD_ENTRY(RLIMIT,	"rlimit-%d"),
>  	FD_ENTRY_F(PAGES,	"pages-%u", O_NOBUF),
> diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h
> index e39db39ad5de..7e75ede80b82 100644
> --- a/criu/include/image-desc.h
> +++ b/criu/include/image-desc.h
> @@ -69,6 +69,7 @@ enum {
>  	CR_FD_FIFO_DATA,
>  	CR_FD_TTY_FILES,
>  	CR_FD_TTY_INFO,
> +	CR_FD_TTY_DATA,
>  	CR_FD_REMAP_FPATH,
>  	CR_FD_EVENTFD_FILE,
>  	CR_FD_EVENTPOLL_FILE,
> diff --git a/criu/include/magic.h b/criu/include/magic.h
> index 8fb6b18d45ca..a458c62e632a 100644
> --- a/criu/include/magic.h
> +++ b/criu/include/magic.h
> @@ -76,6 +76,7 @@
>  #define NETNS_MAGIC		0x55933752 /* Dolgoprudny */
>  #define TTY_FILES_MAGIC		0x59433025 /* Pushkin */
>  #define TTY_INFO_MAGIC		0x59453036 /* Kolpino */
> +#define TTY_DATA_MAGIC		0x59413026 /* Pavlovsk */
>  #define FILE_LOCKS_MAGIC	0x54323616 /* Kaluga */
>  #define RLIMIT_MAGIC		0x57113925 /* Rostov */
>  #define FANOTIFY_FILE_MAGIC	0x55096122 /* Chelyabinsk */
> diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
> index a851f1273f11..4cff5db24bdb 100644
> --- a/criu/include/protobuf-desc.h
> +++ b/criu/include/protobuf-desc.h
> @@ -58,6 +58,7 @@ enum {
>  	PB_NETNS,
>  	PB_BINFMT_MISC,		/* 50 */
>  	PB_AUTOFS,
> +	PB_TTY_DATA,
>  
>  	/* PB_AUTOGEN_STOP */
>  
> diff --git a/criu/include/tty.h b/criu/include/tty.h
> index 48f743eb293b..7ed0895e7168 100644
> --- a/criu/include/tty.h
> +++ b/criu/include/tty.h
> @@ -18,9 +18,11 @@ static inline int is_tty(dev_t rdev, dev_t dev)
>  	return get_tty_driver(rdev, dev) != NULL;
>  }
>  
> +extern int tty_post_dump(int ret);
>  extern int dump_verify_tty_sids(void);
>  extern struct collect_image_info tty_info_cinfo;
>  extern struct collect_image_info tty_cinfo;
> +extern struct collect_image_info tty_cdata;
>  extern int prepare_shared_tty(void);
>  
>  extern int tty_verify_active_pairs(void);
> diff --git a/criu/tty.c b/criu/tty.c
> index 68d7ba3d133b..2a94ae573ec3 100644
> --- a/criu/tty.c
> +++ b/criu/tty.c
> @@ -78,6 +78,11 @@ struct tty_info_entry {
>  	TtyInfoEntry			*tie;
>  };
>  
> +struct tty_data_entry {
> +	struct list_head		list;
> +	TtyDataEntry			*tde;
> +};
> +
>  struct tty_info {
>  	struct list_head		list;
>  	struct file_desc		d;
> @@ -94,6 +99,8 @@ struct tty_info {
>  	bool				inherit;
>  
>  	struct tty_info			*ctl_tty;
> +	struct tty_info			*link;
> +	struct tty_data_entry		*tty_data;
>  };
>  
>  struct tty_dump_info {
> @@ -104,8 +111,16 @@ struct tty_dump_info {
>  	pid_t				pgrp;
>  	int				fd;
>  	struct tty_driver		*driver;
> +
> +	int				index;
> +	int				lfd;
> +	int				flags;
> +	struct tty_dump_info		*link;
> +	void				*tty_data;
> +	size_t				tty_data_size;
>  };
>  
> +static LIST_HEAD(all_tty_data_entries);
>  static LIST_HEAD(all_tty_info_entries);
>  static LIST_HEAD(all_ttys);
>  
> @@ -823,6 +838,19 @@ static int pty_open_slaves(struct tty_info *info)
>  			goto err;
>  		}
>  
> +		if (slave->link && slave->link->tty_data) {
> +			ProtobufCBinaryData bd = slave->link->tty_data->tde->data;
> +			int retval;
> +
> +			pr_debug("restore queued data on slave %#x (%zu bytes)\n",
> +				 info->tfe->id, (size_t)bd.len);
> +
> +			retval = write(fd, bd.data, bd.len);
> +			if (retval != bd.len)
> +				pr_err("Restored %d bytes while %zu expected\n",
> +				       retval, (size_t)bd.len);
> +			slave->link->tty_data = NULL;

What for?

> +		}
>  		close(fd);
>  		fd = -1;
>  	}
> @@ -963,6 +991,20 @@ static int pty_open_ptmx(struct tty_info *info)
>  	if (pty_open_slaves(info))
>  		goto err;
>  
> +	if (info->link && info->link->tty_data) {
> +		ProtobufCBinaryData bd = info->link->tty_data->tde->data;
> +		int ret;
> +
> +		pr_debug("restore queued data on master %#x (%zu bytes)\n",
> +			 info->tfe->id, (size_t)bd.len);
> +
> +		ret = write(master, bd.data, bd.len);
> +		if (ret != bd.len)
> +			pr_err("Restored %d bytes while %zu expected\n",
> +			       ret, (size_t)bd.len);
> +		info->link->tty_data = NULL;

Factor this out with the above hunk.

> +	}
> +
>  	if (info->tie->locked)
>  		lock_pty(master);
>  
> @@ -1222,11 +1264,50 @@ static int tty_setup_orphan_slavery(void)
>  	return 0;
>  }
>  
> +static struct tty_data_entry *tty_lookup_data(struct tty_info *info)
> +{
> +	struct tty_data_entry *td;
> +
> +	list_for_each_entry(td, &all_tty_data_entries, list) {
> +		if (td->tde->tty_id == info->tie->id)
> +			return td;
> +	}
> +
> +	return NULL;
> +}
> +
>  static int tty_setup_slavery(void * unused)
>  {
>  	struct tty_info *info, *peer, *m;
>  
>  	/*
> +	 * Setup links for PTY terminal pairs.
> +	 */
> +	list_for_each_entry(info, &all_ttys, list) {
> +		if (!is_pty(info->driver) || info->link)
> +			continue;
> +		peer = info;
> +		list_for_each_entry_continue(peer, &all_ttys, list) {
> +			if (!is_pty(peer->driver) || peer->link)
> +				continue;
> +			if (peer->tie->pty->index == info->tie->pty->index) {
> +				info->link = peer;
> +				peer->link = info;
> +
> +				pr_debug("Link PTYs (%#x)\n", info->tfe->id);
> +
> +				info->tty_data = tty_lookup_data(info);
> +				peer->tty_data = tty_lookup_data(peer);

Can we better lookup for tty_info in data collecting routine?

> +
> +				pr_debug("queued data %s %p %s %p\n",
> +					info->driver->name, info->tty_data,
> +					peer->driver->name, peer->tty_data);
> +				break;
> +			}
> +		}
> +	}
> +
> +	/*
>  	 * The image may carry several terminals opened
>  	 * belonging to the same session, so choose the
>  	 * leader which gonna be setting up the controlling
> @@ -1417,6 +1498,8 @@ static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i)
>  	info->create = tty_is_master(info);
>  	info->inherit = false;
>  	info->ctl_tty = NULL;
> +	info->tty_data = NULL;
> +	info->link = NULL;
>  
>  	if (verify_info(info))
>  		return -1;
> @@ -1473,6 +1556,24 @@ struct collect_image_info tty_cinfo = {
>  	.collect	= collect_one_tty,
>  };
>  
> +static int collect_one_tty_data(void *obj, ProtobufCMessage *msg, struct cr_img *i)
> +{
> +	struct tty_data_entry *tdo = obj;
> +
> +	tdo->tde = pb_msg(msg, TtyDataEntry);
> +	list_add(&tdo->list, &all_tty_data_entries);
> +	pr_debug("Collected data for id %#x (size %zu bytes)\n",
> +		 tdo->tde->tty_id, (size_t)tdo->tde->data.len);
> +	return 0;
> +}
> +
> +struct collect_image_info tty_cdata = {
> +	.fd_type	= CR_FD_TTY_DATA,
> +	.pb_type	= PB_TTY_DATA,
> +	.priv_size	= sizeof(struct tty_data_entry),
> +	.collect	= collect_one_tty_data,
> +};
> +
>  /* Make sure the ttys we're dumping do belong our process tree */
>  int dump_verify_tty_sids(void)
>  {
> @@ -1518,7 +1619,6 @@ int dump_verify_tty_sids(void)
>  				}
>  			}
>  		}
> -		xfree(dinfo);
>  	}
>  
>  	return ret;
> @@ -1551,7 +1651,7 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d
>  	if (!pti)
>  		return -1;
>  
> -	dinfo = xmalloc(sizeof(*dinfo));
> +	dinfo = xzalloc(sizeof(*dinfo));
>  	if (!dinfo)
>  		return -1;
>  
> @@ -1561,6 +1661,20 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d
>  	dinfo->fd		= p->fd;
>  	dinfo->driver		= driver;
>  
> +	if (is_pty(driver)) {
> +		dinfo->lfd = dup(lfd);
> +		if (dinfo->lfd < 0) {
> +			pr_perror("Can't dup local fd on %x", id);
> +			xfree(dinfo);
> +			return -1;
> +		}
> +		dinfo->index	= index;
> +		dinfo->flags	= p->flags;
> +	} else {
> +		dinfo->index	= -1;
> +		dinfo->lfd	= -1;
> +	}
> +
>  	list_add_tail(&dinfo->list, &all_ttys);
>  
>  	info.id			= id;
> @@ -1698,6 +1812,180 @@ const struct fdtype_ops tty_dump_ops = {
>  	.dump	= dump_one_tty,
>  };
>  
> +static int tty_reblock(int id, int lfd, int flags)
> +{
> +	static const int fmask = O_RDWR | O_NONBLOCK;
> +
> +	if ((flags & fmask) != fmask) {
> +		if (fcntl(lfd, F_SETFL, flags)) {
> +			pr_perror("Can't revert mode back to %o on (%#x)\n", fmask, id);
> +			return -errno;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int tty_unblock(int id, int lfd, int flags)
> +{
> +	static const int fmask = O_RDWR | O_NONBLOCK;
> +
> +	if ((flags & fmask) != fmask) {
> +		if (fcntl(lfd, F_SETFL, fmask)) {
> +			pr_perror("Can't change mode to %o on (%#x)\n", fmask, id);
> +			return -errno;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static char *tty_read_data(struct tty_dump_info *dinfo, size_t *ret_size)
> +{
> +	size_t off = 0, size = 16384;
> +	char *buf;
> +	int ret;
> +
> +	buf = xmalloc(size);
> +	if (!buf)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ret = tty_unblock(dinfo->id, dinfo->lfd, dinfo->flags);
> +	if (ret) {
> +		xfree(buf);
> +		return ERR_PTR(ret);
> +	}
> +
> +	while (1) {
> +		ret = read(dinfo->lfd, &buf[off], size - off);
> +		if (ret == 0) {
> +			pr_debug("No more data on tty (%s %#x)\n",
> +				 dinfo->driver->name, dinfo->id);
> +			break;
> +		} else if (ret < 0) {
> +			if (errno == EAGAIN) {
> +				pr_debug("Not waiting data tty (%s %#x)\n",
> +					 dinfo->driver->name, dinfo->id);
> +				break;
> +			} else {
> +				ret = -errno;
> +				pr_perror("Can't read data from tty (%s %#x)",
> +					  dinfo->driver->name, dinfo->id);
> +				xfree(buf);
> +				return ERR_PTR(ret);
> +			}
> +		}
> +
> +		off += ret;
> +		pr_debug("Read %d bytes (%d) from tty (%s %#x)\n",
> +			 ret, (int)off, dinfo->driver->name, dinfo->id);
> +
> +		if (off >= size) {
> +			pr_err("The tty (%s %#x) queued data overrflow %zu bytes limit\n",
> +			       dinfo->driver->name, dinfo->id, size);
> +			off = size;
> +			break;
> +		}
> +	}
> +
> +	if (!off) {
> +		xfree(buf);
> +		buf = NULL;
> +	}
> +
> +	*ret_size = off;
> +	return buf;
> +}
> +
> +int tty_post_dump(int status)
> +{
> +	struct tty_dump_info *dinfo, *peer, *n;
> +	LIST_HEAD(all_ptys);
> +	int ret = 0;
> +
> +	/*
> +	 * Link PTY peers, and move one of linked
> +	 * into separate list.
> +	 */
> +	list_for_each_entry_safe(dinfo, n, &all_ttys, list) {
> +		if (!is_pty(dinfo->driver) || dinfo->link)
> +			continue;
> +		peer = dinfo;
> +		list_for_each_entry_continue(peer, &all_ttys, list) {
> +			if (!is_pty(peer->driver) || peer->link)
> +				continue;
> +
> +			if (peer->index == dinfo->index) {
> +				dinfo->link = peer;
> +				peer->link = dinfo;
> +				pr_debug("Link PTYs (%#x)\n", dinfo->id);
> +				list_move(&dinfo->list, &all_ptys);
> +			}
> +		}
> +	}

Can we do this linkage while dumping the ttys?

> +
> +	list_for_each_entry(dinfo, &all_ptys, list) {
> +		TtyDataEntry e;
> +
> +		dinfo->tty_data = tty_read_data(dinfo, &dinfo->tty_data_size);
> +		if (IS_ERR(dinfo->tty_data)) {
> +			ret = PTR_ERR(dinfo->tty_data);
> +			dinfo->tty_data = NULL;
> +			break;
> +		} else if (dinfo->tty_data) {
> +			tty_data_entry__init(&e);
> +			e.tty_id	= dinfo->id;
> +			e.data.data	= dinfo->tty_data;
> +			e.data.len	= dinfo->tty_data_size;
> +
> +			ret = pb_write_one(img_from_set(glob_imgset, CR_FD_TTY_DATA), &e, PB_TTY_DATA);
> +			if (ret)
> +				break;
> +		}
> +
> +		dinfo->link->tty_data = tty_read_data(dinfo->link, &dinfo->link->tty_data_size);
> +		if (IS_ERR(dinfo->link->tty_data)) {
> +			ret = PTR_ERR(dinfo->link->tty_data);
> +			dinfo->link->tty_data = NULL;
> +			break;
> +		} else if (dinfo->link->tty_data) {
> +			tty_data_entry__init(&e);
> +			e.tty_id	= dinfo->link->id;
> +			e.data.data	= dinfo->link->tty_data;
> +			e.data.len	= dinfo->link->tty_data_size;
> +
> +			ret = pb_write_one(img_from_set(glob_imgset, CR_FD_TTY_DATA), &e, PB_TTY_DATA);
> +			if (ret)
> +				break;
> +		}

Factor the code out.

> +	}
> +
> +	if (ret || opts.final_state == TASK_ALIVE) {

|| opts.final_state != TASK_DEAD

> +		list_for_each_entry(dinfo, &all_ptys, list) {
> +			if (dinfo->link->tty_data) {
> +				if (write(dinfo->link->lfd, dinfo->link->tty_data,
> +					  dinfo->link->tty_data_size) != dinfo->link->tty_data_size)
> +					pr_perror("Can't writedata data back to tty (%#x)\n",
> +						  dinfo->link->id);
> +				tty_reblock(dinfo->link->id, dinfo->link->lfd, dinfo->link->flags);
> +			}
> +		}
> +	}
> +
> +	list_for_each_entry_safe(dinfo, n, &all_ttys, list) {
> +		close_safe(&dinfo->lfd);
> +		xfree(dinfo->tty_data);
> +	}
> +
> +	list_for_each_entry_safe(dinfo, n, &all_ptys, list) {
> +		close_safe(&dinfo->lfd);
> +		xfree(dinfo->tty_data);
> +		xfree(dinfo);
> +	}
> +
> +	return ret;
> +}
> +
>  int tty_prep_fds(void)
>  {
>  	if (!opts.shell_job)
> diff --git a/images/tty.proto b/images/tty.proto
> index 4b5a70c20a88..a961b5e7339c 100644
> --- a/images/tty.proto
> +++ b/images/tty.proto
> @@ -33,6 +33,11 @@ enum TtyType {
>  	EXT_TTY		= 5;
>  }
>  
> +message tty_data_entry {
> +	required uint32			tty_id		= 1;
> +	required bytes			data		= 2;
> +}
> +
>  message tty_info_entry {
>  	required uint32			id		=  1;
>  
> diff --git a/lib/py/images/images.py b/lib/py/images/images.py
> index 0bc0a1fa3ea4..1f0d7085c0b0 100644
> --- a/lib/py/images/images.py
> +++ b/lib/py/images/images.py
> @@ -370,6 +370,7 @@ handlers = {
>  	'MNTS'			: entry_handler(mnt_entry),
>  	'TTY_FILES'		: entry_handler(tty_file_entry),
>  	'TTY_INFO'		: entry_handler(tty_info_entry),
> +	'TTY_DATA'		: entry_handler(tty_data_entry),
>  	'RLIMIT'		: entry_handler(rlimit_entry),
>  	'TUNFILE'		: entry_handler(tunfile_entry),
>  	'EXT_FILES'		: entry_handler(ext_file_entry),
> 



More information about the CRIU mailing list