[CRIU] [PATCH 1/2] tty: Write unread pty buffers on post dump stage

Cyrill Gorcunov gorcunov at openvz.org
Tue Apr 19 12:37:49 PDT 2016


When unread data present on peers we currently simply ignore
it but actually we can try to fetch it in non(that)destructive
way.

For this sake we collect tty peers into a separate list
(currently only _paired_ ptys are supported, which means
that both ends must belong the container. The reason for
that it due to we can't guarantee anything on external
peers which can continue generating data during a dumping
procedure).

Once they are collected at the post dump stage we read the
data from peers and write it into the image. In case if
some error happened we write the data back into
appropriate pty.

The same applies to restore stage -- we simply link
peers together and when appropriate peer get opened
we restore its neighbour content.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 criu/cr-dump.c               |   2 +
 criu/cr-restore.c            |   1 +
 criu/image-desc.c            |   1 +
 criu/include/image-desc.h    |   1 +
 criu/include/magic.h         |   1 +
 criu/include/protobuf-desc.h |   1 +
 criu/include/tty.h           |   2 +
 criu/tty.c                   | 292 ++++++++++++++++++++++++++++++++++++++++++-
 images/tty.proto             |   5 +
 lib/py/images/images.py      |   1 +
 10 files changed, 305 insertions(+), 2 deletions(-)

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 5ac9fd041e4e..754db6d93478 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1545,6 +1545,8 @@ static int cr_dump_finish(int ret)
 {
 	int post_dump_ret = 0;
 
+	tty_post_dump(ret);
+
 	if (disconnect_from_page_server())
 		ret = -1;
 
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 34db7f7a360d..36ff5f736a22 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -178,6 +178,7 @@ static struct collect_image_info *cinfos[] = {
 	&fanotify_mark_cinfo,
 	&tty_info_cinfo,
 	&tty_cinfo,
+	&tty_cdata,
 	&tunfile_cinfo,
 	&ext_file_cinfo,
 	&timerfd_cinfo,
diff --git a/criu/image-desc.c b/criu/image-desc.c
index 2949c592b17f..2b31354f29da 100644
--- a/criu/image-desc.c
+++ b/criu/image-desc.c
@@ -82,6 +82,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
 	FD_ENTRY(BINFMT_MISC,	"binfmt-misc-%d"),
 	FD_ENTRY(TTY_FILES,	"tty"),
 	FD_ENTRY(TTY_INFO,	"tty-info"),
+	FD_ENTRY_F(TTY_DATA,	"tty-data", O_NOBUF),
 	FD_ENTRY(FILE_LOCKS,	"filelocks"),
 	FD_ENTRY(RLIMIT,	"rlimit-%d"),
 	FD_ENTRY_F(PAGES,	"pages-%u", O_NOBUF),
diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h
index e39db39ad5de..7e75ede80b82 100644
--- a/criu/include/image-desc.h
+++ b/criu/include/image-desc.h
@@ -69,6 +69,7 @@ enum {
 	CR_FD_FIFO_DATA,
 	CR_FD_TTY_FILES,
 	CR_FD_TTY_INFO,
+	CR_FD_TTY_DATA,
 	CR_FD_REMAP_FPATH,
 	CR_FD_EVENTFD_FILE,
 	CR_FD_EVENTPOLL_FILE,
diff --git a/criu/include/magic.h b/criu/include/magic.h
index 8fb6b18d45ca..a458c62e632a 100644
--- a/criu/include/magic.h
+++ b/criu/include/magic.h
@@ -76,6 +76,7 @@
 #define NETNS_MAGIC		0x55933752 /* Dolgoprudny */
 #define TTY_FILES_MAGIC		0x59433025 /* Pushkin */
 #define TTY_INFO_MAGIC		0x59453036 /* Kolpino */
+#define TTY_DATA_MAGIC		0x59413026 /* Pavlovsk */
 #define FILE_LOCKS_MAGIC	0x54323616 /* Kaluga */
 #define RLIMIT_MAGIC		0x57113925 /* Rostov */
 #define FANOTIFY_FILE_MAGIC	0x55096122 /* Chelyabinsk */
diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
index a851f1273f11..4cff5db24bdb 100644
--- a/criu/include/protobuf-desc.h
+++ b/criu/include/protobuf-desc.h
@@ -58,6 +58,7 @@ enum {
 	PB_NETNS,
 	PB_BINFMT_MISC,		/* 50 */
 	PB_AUTOFS,
+	PB_TTY_DATA,
 
 	/* PB_AUTOGEN_STOP */
 
diff --git a/criu/include/tty.h b/criu/include/tty.h
index 48f743eb293b..7ed0895e7168 100644
--- a/criu/include/tty.h
+++ b/criu/include/tty.h
@@ -18,9 +18,11 @@ static inline int is_tty(dev_t rdev, dev_t dev)
 	return get_tty_driver(rdev, dev) != NULL;
 }
 
+extern int tty_post_dump(int ret);
 extern int dump_verify_tty_sids(void);
 extern struct collect_image_info tty_info_cinfo;
 extern struct collect_image_info tty_cinfo;
+extern struct collect_image_info tty_cdata;
 extern int prepare_shared_tty(void);
 
 extern int tty_verify_active_pairs(void);
diff --git a/criu/tty.c b/criu/tty.c
index 68d7ba3d133b..2a94ae573ec3 100644
--- a/criu/tty.c
+++ b/criu/tty.c
@@ -78,6 +78,11 @@ struct tty_info_entry {
 	TtyInfoEntry			*tie;
 };
 
+struct tty_data_entry {
+	struct list_head		list;
+	TtyDataEntry			*tde;
+};
+
 struct tty_info {
 	struct list_head		list;
 	struct file_desc		d;
@@ -94,6 +99,8 @@ struct tty_info {
 	bool				inherit;
 
 	struct tty_info			*ctl_tty;
+	struct tty_info			*link;
+	struct tty_data_entry		*tty_data;
 };
 
 struct tty_dump_info {
@@ -104,8 +111,16 @@ struct tty_dump_info {
 	pid_t				pgrp;
 	int				fd;
 	struct tty_driver		*driver;
+
+	int				index;
+	int				lfd;
+	int				flags;
+	struct tty_dump_info		*link;
+	void				*tty_data;
+	size_t				tty_data_size;
 };
 
+static LIST_HEAD(all_tty_data_entries);
 static LIST_HEAD(all_tty_info_entries);
 static LIST_HEAD(all_ttys);
 
@@ -823,6 +838,19 @@ static int pty_open_slaves(struct tty_info *info)
 			goto err;
 		}
 
+		if (slave->link && slave->link->tty_data) {
+			ProtobufCBinaryData bd = slave->link->tty_data->tde->data;
+			int retval;
+
+			pr_debug("restore queued data on slave %#x (%zu bytes)\n",
+				 info->tfe->id, (size_t)bd.len);
+
+			retval = write(fd, bd.data, bd.len);
+			if (retval != bd.len)
+				pr_err("Restored %d bytes while %zu expected\n",
+				       retval, (size_t)bd.len);
+			slave->link->tty_data = NULL;
+		}
 		close(fd);
 		fd = -1;
 	}
@@ -963,6 +991,20 @@ static int pty_open_ptmx(struct tty_info *info)
 	if (pty_open_slaves(info))
 		goto err;
 
+	if (info->link && info->link->tty_data) {
+		ProtobufCBinaryData bd = info->link->tty_data->tde->data;
+		int ret;
+
+		pr_debug("restore queued data on master %#x (%zu bytes)\n",
+			 info->tfe->id, (size_t)bd.len);
+
+		ret = write(master, bd.data, bd.len);
+		if (ret != bd.len)
+			pr_err("Restored %d bytes while %zu expected\n",
+			       ret, (size_t)bd.len);
+		info->link->tty_data = NULL;
+	}
+
 	if (info->tie->locked)
 		lock_pty(master);
 
@@ -1222,11 +1264,50 @@ static int tty_setup_orphan_slavery(void)
 	return 0;
 }
 
+static struct tty_data_entry *tty_lookup_data(struct tty_info *info)
+{
+	struct tty_data_entry *td;
+
+	list_for_each_entry(td, &all_tty_data_entries, list) {
+		if (td->tde->tty_id == info->tie->id)
+			return td;
+	}
+
+	return NULL;
+}
+
 static int tty_setup_slavery(void * unused)
 {
 	struct tty_info *info, *peer, *m;
 
 	/*
+	 * Setup links for PTY terminal pairs.
+	 */
+	list_for_each_entry(info, &all_ttys, list) {
+		if (!is_pty(info->driver) || info->link)
+			continue;
+		peer = info;
+		list_for_each_entry_continue(peer, &all_ttys, list) {
+			if (!is_pty(peer->driver) || peer->link)
+				continue;
+			if (peer->tie->pty->index == info->tie->pty->index) {
+				info->link = peer;
+				peer->link = info;
+
+				pr_debug("Link PTYs (%#x)\n", info->tfe->id);
+
+				info->tty_data = tty_lookup_data(info);
+				peer->tty_data = tty_lookup_data(peer);
+
+				pr_debug("queued data %s %p %s %p\n",
+					info->driver->name, info->tty_data,
+					peer->driver->name, peer->tty_data);
+				break;
+			}
+		}
+	}
+
+	/*
 	 * The image may carry several terminals opened
 	 * belonging to the same session, so choose the
 	 * leader which gonna be setting up the controlling
@@ -1417,6 +1498,8 @@ static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i)
 	info->create = tty_is_master(info);
 	info->inherit = false;
 	info->ctl_tty = NULL;
+	info->tty_data = NULL;
+	info->link = NULL;
 
 	if (verify_info(info))
 		return -1;
@@ -1473,6 +1556,24 @@ struct collect_image_info tty_cinfo = {
 	.collect	= collect_one_tty,
 };
 
+static int collect_one_tty_data(void *obj, ProtobufCMessage *msg, struct cr_img *i)
+{
+	struct tty_data_entry *tdo = obj;
+
+	tdo->tde = pb_msg(msg, TtyDataEntry);
+	list_add(&tdo->list, &all_tty_data_entries);
+	pr_debug("Collected data for id %#x (size %zu bytes)\n",
+		 tdo->tde->tty_id, (size_t)tdo->tde->data.len);
+	return 0;
+}
+
+struct collect_image_info tty_cdata = {
+	.fd_type	= CR_FD_TTY_DATA,
+	.pb_type	= PB_TTY_DATA,
+	.priv_size	= sizeof(struct tty_data_entry),
+	.collect	= collect_one_tty_data,
+};
+
 /* Make sure the ttys we're dumping do belong our process tree */
 int dump_verify_tty_sids(void)
 {
@@ -1518,7 +1619,6 @@ int dump_verify_tty_sids(void)
 				}
 			}
 		}
-		xfree(dinfo);
 	}
 
 	return ret;
@@ -1551,7 +1651,7 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d
 	if (!pti)
 		return -1;
 
-	dinfo = xmalloc(sizeof(*dinfo));
+	dinfo = xzalloc(sizeof(*dinfo));
 	if (!dinfo)
 		return -1;
 
@@ -1561,6 +1661,20 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d
 	dinfo->fd		= p->fd;
 	dinfo->driver		= driver;
 
+	if (is_pty(driver)) {
+		dinfo->lfd = dup(lfd);
+		if (dinfo->lfd < 0) {
+			pr_perror("Can't dup local fd on %x", id);
+			xfree(dinfo);
+			return -1;
+		}
+		dinfo->index	= index;
+		dinfo->flags	= p->flags;
+	} else {
+		dinfo->index	= -1;
+		dinfo->lfd	= -1;
+	}
+
 	list_add_tail(&dinfo->list, &all_ttys);
 
 	info.id			= id;
@@ -1698,6 +1812,180 @@ const struct fdtype_ops tty_dump_ops = {
 	.dump	= dump_one_tty,
 };
 
+static int tty_reblock(int id, int lfd, int flags)
+{
+	static const int fmask = O_RDWR | O_NONBLOCK;
+
+	if ((flags & fmask) != fmask) {
+		if (fcntl(lfd, F_SETFL, flags)) {
+			pr_perror("Can't revert mode back to %o on (%#x)\n", fmask, id);
+			return -errno;
+		}
+	}
+
+	return 0;
+}
+
+static int tty_unblock(int id, int lfd, int flags)
+{
+	static const int fmask = O_RDWR | O_NONBLOCK;
+
+	if ((flags & fmask) != fmask) {
+		if (fcntl(lfd, F_SETFL, fmask)) {
+			pr_perror("Can't change mode to %o on (%#x)\n", fmask, id);
+			return -errno;
+		}
+	}
+
+	return 0;
+}
+
+static char *tty_read_data(struct tty_dump_info *dinfo, size_t *ret_size)
+{
+	size_t off = 0, size = 16384;
+	char *buf;
+	int ret;
+
+	buf = xmalloc(size);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	ret = tty_unblock(dinfo->id, dinfo->lfd, dinfo->flags);
+	if (ret) {
+		xfree(buf);
+		return ERR_PTR(ret);
+	}
+
+	while (1) {
+		ret = read(dinfo->lfd, &buf[off], size - off);
+		if (ret == 0) {
+			pr_debug("No more data on tty (%s %#x)\n",
+				 dinfo->driver->name, dinfo->id);
+			break;
+		} else if (ret < 0) {
+			if (errno == EAGAIN) {
+				pr_debug("Not waiting data tty (%s %#x)\n",
+					 dinfo->driver->name, dinfo->id);
+				break;
+			} else {
+				ret = -errno;
+				pr_perror("Can't read data from tty (%s %#x)",
+					  dinfo->driver->name, dinfo->id);
+				xfree(buf);
+				return ERR_PTR(ret);
+			}
+		}
+
+		off += ret;
+		pr_debug("Read %d bytes (%d) from tty (%s %#x)\n",
+			 ret, (int)off, dinfo->driver->name, dinfo->id);
+
+		if (off >= size) {
+			pr_err("The tty (%s %#x) queued data overrflow %zu bytes limit\n",
+			       dinfo->driver->name, dinfo->id, size);
+			off = size;
+			break;
+		}
+	}
+
+	if (!off) {
+		xfree(buf);
+		buf = NULL;
+	}
+
+	*ret_size = off;
+	return buf;
+}
+
+int tty_post_dump(int status)
+{
+	struct tty_dump_info *dinfo, *peer, *n;
+	LIST_HEAD(all_ptys);
+	int ret = 0;
+
+	/*
+	 * Link PTY peers, and move one of linked
+	 * into separate list.
+	 */
+	list_for_each_entry_safe(dinfo, n, &all_ttys, list) {
+		if (!is_pty(dinfo->driver) || dinfo->link)
+			continue;
+		peer = dinfo;
+		list_for_each_entry_continue(peer, &all_ttys, list) {
+			if (!is_pty(peer->driver) || peer->link)
+				continue;
+
+			if (peer->index == dinfo->index) {
+				dinfo->link = peer;
+				peer->link = dinfo;
+				pr_debug("Link PTYs (%#x)\n", dinfo->id);
+				list_move(&dinfo->list, &all_ptys);
+			}
+		}
+	}
+
+	list_for_each_entry(dinfo, &all_ptys, list) {
+		TtyDataEntry e;
+
+		dinfo->tty_data = tty_read_data(dinfo, &dinfo->tty_data_size);
+		if (IS_ERR(dinfo->tty_data)) {
+			ret = PTR_ERR(dinfo->tty_data);
+			dinfo->tty_data = NULL;
+			break;
+		} else if (dinfo->tty_data) {
+			tty_data_entry__init(&e);
+			e.tty_id	= dinfo->id;
+			e.data.data	= dinfo->tty_data;
+			e.data.len	= dinfo->tty_data_size;
+
+			ret = pb_write_one(img_from_set(glob_imgset, CR_FD_TTY_DATA), &e, PB_TTY_DATA);
+			if (ret)
+				break;
+		}
+
+		dinfo->link->tty_data = tty_read_data(dinfo->link, &dinfo->link->tty_data_size);
+		if (IS_ERR(dinfo->link->tty_data)) {
+			ret = PTR_ERR(dinfo->link->tty_data);
+			dinfo->link->tty_data = NULL;
+			break;
+		} else if (dinfo->link->tty_data) {
+			tty_data_entry__init(&e);
+			e.tty_id	= dinfo->link->id;
+			e.data.data	= dinfo->link->tty_data;
+			e.data.len	= dinfo->link->tty_data_size;
+
+			ret = pb_write_one(img_from_set(glob_imgset, CR_FD_TTY_DATA), &e, PB_TTY_DATA);
+			if (ret)
+				break;
+		}
+	}
+
+	if (ret || opts.final_state == TASK_ALIVE) {
+		list_for_each_entry(dinfo, &all_ptys, list) {
+			if (dinfo->link->tty_data) {
+				if (write(dinfo->link->lfd, dinfo->link->tty_data,
+					  dinfo->link->tty_data_size) != dinfo->link->tty_data_size)
+					pr_perror("Can't writedata data back to tty (%#x)\n",
+						  dinfo->link->id);
+				tty_reblock(dinfo->link->id, dinfo->link->lfd, dinfo->link->flags);
+			}
+		}
+	}
+
+	list_for_each_entry_safe(dinfo, n, &all_ttys, list) {
+		close_safe(&dinfo->lfd);
+		xfree(dinfo->tty_data);
+	}
+
+	list_for_each_entry_safe(dinfo, n, &all_ptys, list) {
+		close_safe(&dinfo->lfd);
+		xfree(dinfo->tty_data);
+		xfree(dinfo);
+	}
+
+	return ret;
+}
+
 int tty_prep_fds(void)
 {
 	if (!opts.shell_job)
diff --git a/images/tty.proto b/images/tty.proto
index 4b5a70c20a88..a961b5e7339c 100644
--- a/images/tty.proto
+++ b/images/tty.proto
@@ -33,6 +33,11 @@ enum TtyType {
 	EXT_TTY		= 5;
 }
 
+message tty_data_entry {
+	required uint32			tty_id		= 1;
+	required bytes			data		= 2;
+}
+
 message tty_info_entry {
 	required uint32			id		=  1;
 
diff --git a/lib/py/images/images.py b/lib/py/images/images.py
index 0bc0a1fa3ea4..1f0d7085c0b0 100644
--- a/lib/py/images/images.py
+++ b/lib/py/images/images.py
@@ -370,6 +370,7 @@ handlers = {
 	'MNTS'			: entry_handler(mnt_entry),
 	'TTY_FILES'		: entry_handler(tty_file_entry),
 	'TTY_INFO'		: entry_handler(tty_info_entry),
+	'TTY_DATA'		: entry_handler(tty_data_entry),
 	'RLIMIT'		: entry_handler(rlimit_entry),
 	'TUNFILE'		: entry_handler(tunfile_entry),
 	'EXT_FILES'		: entry_handler(ext_file_entry),
-- 
2.5.5



More information about the CRIU mailing list