[CRIU] [PATCH 1/2] tty: Write unread pty buffers on post dump stage

Cyrill Gorcunov gorcunov at openvz.org
Thu May 12 11:21:45 PDT 2016


When unread data present on peers we currently simply ignore it but
actually we can try to fetch it in non(that)destructive way.

For this sake at the end of dump procedure (because fetching
queued data may go wrong and we will have to write it back,
which is heavy, and we need all ttys under our hands)
we walk over all collected TTYs and link PTYs peers which
indices are matching. Note to not overload tty_dump_info we
reuse @list member for new @all_ptys list.

Once link established we literally read queued data and flush
it into new tty-data.img. If something go wrong at this moment,
we stop reading queued data but walk back over already queued
ones and write them back to restore former state. Same applies
if the dump has been requested to leave task alive.

On restore we link peers back and write queued data once
peer back to live.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 criu/cr-dump.c               |   2 +
 criu/cr-restore.c            |   1 +
 criu/image-desc.c            |   1 +
 criu/include/image-desc.h    |   1 +
 criu/include/magic.h         |   1 +
 criu/include/protobuf-desc.h |   1 +
 criu/include/tty.h           |   2 +
 criu/tty.c                   | 307 ++++++++++++++++++++++++++++++++++++++++++-
 images/tty.proto             |   5 +
 lib/py/images/images.py      |   1 +
 10 files changed, 319 insertions(+), 3 deletions(-)

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 5ac9fd041e4e..dcb496ce94fc 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1545,6 +1545,8 @@ static int cr_dump_finish(int ret)
 {
 	int post_dump_ret = 0;
 
+	tty_dump_queued_data(ret);
+
 	if (disconnect_from_page_server())
 		ret = -1;
 
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index f115c1f96700..ca4727ea62e2 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -176,6 +176,7 @@ static struct collect_image_info *cinfos[] = {
 	&inotify_mark_cinfo,
 	&fanotify_cinfo,
 	&fanotify_mark_cinfo,
+	&tty_cdata,
 	&tty_info_cinfo,
 	&tty_cinfo,
 	&tunfile_cinfo,
diff --git a/criu/image-desc.c b/criu/image-desc.c
index 2949c592b17f..2b31354f29da 100644
--- a/criu/image-desc.c
+++ b/criu/image-desc.c
@@ -82,6 +82,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
 	FD_ENTRY(BINFMT_MISC,	"binfmt-misc-%d"),
 	FD_ENTRY(TTY_FILES,	"tty"),
 	FD_ENTRY(TTY_INFO,	"tty-info"),
+	FD_ENTRY_F(TTY_DATA,	"tty-data", O_NOBUF),
 	FD_ENTRY(FILE_LOCKS,	"filelocks"),
 	FD_ENTRY(RLIMIT,	"rlimit-%d"),
 	FD_ENTRY_F(PAGES,	"pages-%u", O_NOBUF),
diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h
index e39db39ad5de..7e75ede80b82 100644
--- a/criu/include/image-desc.h
+++ b/criu/include/image-desc.h
@@ -69,6 +69,7 @@ enum {
 	CR_FD_FIFO_DATA,
 	CR_FD_TTY_FILES,
 	CR_FD_TTY_INFO,
+	CR_FD_TTY_DATA,
 	CR_FD_REMAP_FPATH,
 	CR_FD_EVENTFD_FILE,
 	CR_FD_EVENTPOLL_FILE,
diff --git a/criu/include/magic.h b/criu/include/magic.h
index 8fb6b18d45ca..a458c62e632a 100644
--- a/criu/include/magic.h
+++ b/criu/include/magic.h
@@ -76,6 +76,7 @@
 #define NETNS_MAGIC		0x55933752 /* Dolgoprudny */
 #define TTY_FILES_MAGIC		0x59433025 /* Pushkin */
 #define TTY_INFO_MAGIC		0x59453036 /* Kolpino */
+#define TTY_DATA_MAGIC		0x59413026 /* Pavlovsk */
 #define FILE_LOCKS_MAGIC	0x54323616 /* Kaluga */
 #define RLIMIT_MAGIC		0x57113925 /* Rostov */
 #define FANOTIFY_FILE_MAGIC	0x55096122 /* Chelyabinsk */
diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
index a851f1273f11..4cff5db24bdb 100644
--- a/criu/include/protobuf-desc.h
+++ b/criu/include/protobuf-desc.h
@@ -58,6 +58,7 @@ enum {
 	PB_NETNS,
 	PB_BINFMT_MISC,		/* 50 */
 	PB_AUTOFS,
+	PB_TTY_DATA,
 
 	/* PB_AUTOGEN_STOP */
 
diff --git a/criu/include/tty.h b/criu/include/tty.h
index 24841d055b87..383481df2d04 100644
--- a/criu/include/tty.h
+++ b/criu/include/tty.h
@@ -22,9 +22,11 @@ static inline int is_tty(dev_t rdev, dev_t dev)
 	return get_tty_driver(rdev, dev) != NULL;
 }
 
+extern int tty_dump_queued_data(int ret);
 extern int dump_verify_tty_sids(void);
 extern struct collect_image_info tty_info_cinfo;
 extern struct collect_image_info tty_cinfo;
+extern struct collect_image_info tty_cdata;
 extern int prepare_shared_tty(void);
 
 extern int tty_verify_active_pairs(void);
diff --git a/criu/tty.c b/criu/tty.c
index c6e8e32b7013..af7ed94d399b 100644
--- a/criu/tty.c
+++ b/criu/tty.c
@@ -78,6 +78,11 @@ struct tty_info_entry {
 	TtyInfoEntry			*tie;
 };
 
+struct tty_data_entry {
+	struct list_head		list;
+	TtyDataEntry			*tde;
+};
+
 struct tty_info {
 	struct list_head		list;
 	struct file_desc		d;
@@ -94,6 +99,8 @@ struct tty_info {
 	bool				inherit;
 
 	struct tty_info			*ctl_tty;
+	struct tty_info			*link;
+	struct tty_data_entry		*tty_data;
 };
 
 struct tty_dump_info {
@@ -104,8 +111,16 @@ struct tty_dump_info {
 	pid_t				pgrp;
 	int				fd;
 	struct tty_driver		*driver;
+
+	int				index;
+	int				lfd;
+	int				flags;
+	struct tty_dump_info		*link;
+	void				*tty_data;
+	size_t				tty_data_size;
 };
 
+static LIST_HEAD(all_tty_data_entries);
 static LIST_HEAD(all_tty_info_entries);
 static LIST_HEAD(all_ttys);
 
@@ -800,6 +815,22 @@ static int restore_tty_params(int fd, struct tty_info *info)
 	return userns_call(do_restore_tty_parms, UNS_ASYNC, &p, sizeof(p), fd);
 }
 
+static void pty_restore_queued_data(struct tty_info *info, int fd)
+{
+	if (info && info->tty_data) {
+		ProtobufCBinaryData bd = info->tty_data->tde->data;
+		int retval;
+
+		pr_debug("restore queued data on %#x (%zu bytes)\n",
+			 info->tfe->id, (size_t)bd.len);
+
+		retval = write(fd, bd.data, bd.len);
+		if (retval != bd.len)
+			pr_err("Restored %d bytes while %zu expected\n",
+			       retval, (size_t)bd.len);
+	}
+}
+
 static int pty_open_slaves(struct tty_info *info)
 {
 	int sock = -1, fd = -1, ret = -1;
@@ -834,6 +865,7 @@ static int pty_open_slaves(struct tty_info *info)
 			goto err;
 		}
 
+		pty_restore_queued_data(slave->link, fd);
 		close(fd);
 		fd = -1;
 	}
@@ -974,6 +1006,8 @@ static int pty_open_ptmx(struct tty_info *info)
 	if (pty_open_slaves(info))
 		goto err;
 
+	pty_restore_queued_data(info->link, master);
+
 	if (info->tie->locked)
 		lock_pty(master);
 
@@ -1238,6 +1272,26 @@ static int tty_setup_slavery(void * unused)
 	struct tty_info *info, *peer, *m;
 
 	/*
+	 * Setup links for PTY terminal pairs.
+	 */
+	list_for_each_entry(info, &all_ttys, list) {
+		if (!is_pty(info->driver) || info->link)
+			continue;
+		peer = info;
+		list_for_each_entry_continue(peer, &all_ttys, list) {
+			if (!is_pty(peer->driver) || peer->link)
+				continue;
+			if (peer->tie->pty->index == info->tie->pty->index) {
+				info->link = peer;
+				peer->link = info;
+
+				pr_debug("Link PTYs (%#x)\n", info->tfe->id);
+				break;
+			}
+		}
+	}
+
+	/*
 	 * The image may carry several terminals opened
 	 * belonging to the same session, so choose the
 	 * leader which gonna be setting up the controlling
@@ -1407,6 +1461,18 @@ struct collect_image_info tty_info_cinfo = {
 	.collect	= collect_one_tty_info_entry,
 };
 
+static struct tty_data_entry *tty_lookup_data(struct tty_info *info)
+{
+	struct tty_data_entry *td;
+
+	list_for_each_entry(td, &all_tty_data_entries, list) {
+		if (td->tde->tty_id == info->tie->id)
+			return td;
+	}
+
+	return NULL;
+}
+
 static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i)
 {
 	struct tty_info *info = obj;
@@ -1429,6 +1495,8 @@ static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i)
 	info->create = tty_is_master(info);
 	info->inherit = false;
 	info->ctl_tty = NULL;
+	info->tty_data = is_pty(info->driver) ? tty_lookup_data(info) : NULL;
+	info->link = NULL;
 
 	if (verify_info(info))
 		return -1;
@@ -1463,7 +1531,11 @@ static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i)
 	if (is_pty(info->driver) && info->tie->termios)
 		tty_test_and_set(info->tfe->tty_info_id, tty_active_pairs);
 
-	pr_info("Collected tty ID %#x (%s)\n", info->tfe->id, info->driver->name);
+	pr_info("Collected tty ID %#x (%s) queued data %zu bytes\n",
+		info->tfe->id, info->driver->name,
+		info->tty_data ?
+		(size_t)info->tty_data->tde->data.len :
+		(size_t)0);
 
 	if (list_empty(&all_ttys))
 		/*
@@ -1485,6 +1557,24 @@ struct collect_image_info tty_cinfo = {
 	.collect	= collect_one_tty,
 };
 
+static int collect_one_tty_data(void *obj, ProtobufCMessage *msg, struct cr_img *i)
+{
+	struct tty_data_entry *tdo = obj;
+
+	tdo->tde = pb_msg(msg, TtyDataEntry);
+	list_add(&tdo->list, &all_tty_data_entries);
+	pr_debug("Collected data for id %#x (size %zu bytes)\n",
+		 tdo->tde->tty_id, (size_t)tdo->tde->data.len);
+	return 0;
+}
+
+struct collect_image_info tty_cdata = {
+	.fd_type	= CR_FD_TTY_DATA,
+	.pb_type	= PB_TTY_DATA,
+	.priv_size	= sizeof(struct tty_data_entry),
+	.collect	= collect_one_tty_data,
+};
+
 /* Make sure the ttys we're dumping do belong our process tree */
 int dump_verify_tty_sids(void)
 {
@@ -1530,7 +1620,6 @@ int dump_verify_tty_sids(void)
 				}
 			}
 		}
-		xfree(dinfo);
 	}
 
 	return ret;
@@ -1563,7 +1652,7 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d
 	if (!pti)
 		return -1;
 
-	dinfo = xmalloc(sizeof(*dinfo));
+	dinfo = xzalloc(sizeof(*dinfo));
 	if (!dinfo)
 		return -1;
 
@@ -1573,6 +1662,20 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d
 	dinfo->fd		= p->fd;
 	dinfo->driver		= driver;
 
+	if (is_pty(driver)) {
+		dinfo->lfd = dup(lfd);
+		if (dinfo->lfd < 0) {
+			pr_perror("Can't dup local fd on %x", id);
+			xfree(dinfo);
+			return -1;
+		}
+		dinfo->index	= index;
+		dinfo->flags	= p->flags;
+	} else {
+		dinfo->index	= -1;
+		dinfo->lfd	= -1;
+	}
+
 	list_add_tail(&dinfo->list, &all_ttys);
 
 	info.id			= id;
@@ -1710,6 +1813,204 @@ const struct fdtype_ops tty_dump_ops = {
 	.dump	= dump_one_tty,
 };
 
+static int tty_reblock(int id, int lfd, int flags)
+{
+	static const int fmask = O_RDWR | O_NONBLOCK;
+	int ret;
+
+	if ((flags & fmask) != fmask) {
+		if (fcntl(lfd, F_SETFL, flags)) {
+			ret = -errno;
+			pr_perror("Can't revert mode back to %o on (%#x)\n", fmask, id);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int tty_unblock(int id, int lfd, int flags)
+{
+	static const int fmask = O_RDWR | O_NONBLOCK;
+	int ret;
+
+	if ((flags & fmask) != fmask) {
+		if (fcntl(lfd, F_SETFL, fmask)) {
+			ret = -errno;
+			pr_perror("Can't change mode to %o on (%#x)\n", fmask, id);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int tty_do_dump_queued_data(struct tty_dump_info *dinfo)
+{
+	TtyDataEntry e = TTY_DATA_ENTRY__INIT;
+	size_t off = 0, size = 16384;
+	char *buf;
+	int ret;
+
+	buf = xmalloc(size);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = tty_unblock(dinfo->id, dinfo->lfd, dinfo->flags);
+	if (ret) {
+		xfree(buf);
+		return ret;
+	}
+
+	while (1) {
+		ret = read(dinfo->lfd, &buf[off], size - off);
+		if (ret == 0) {
+			pr_debug("No more data on tty (%s %#x)\n",
+				 dinfo->driver->name, dinfo->id);
+			break;
+		} else if (ret < 0) {
+			if (errno == EAGAIN) {
+				pr_debug("Not waiting data tty (%s %#x)\n",
+					 dinfo->driver->name, dinfo->id);
+				break;
+			} else {
+				ret = -errno;
+				pr_perror("Can't read data from tty (%s %#x)",
+					  dinfo->driver->name, dinfo->id);
+				xfree(buf);
+				return ret;
+			}
+		}
+
+		off += ret;
+		pr_debug("Read %d bytes (%d) from tty (%s %#x)\n",
+			 ret, (int)off, dinfo->driver->name, dinfo->id);
+
+		if (off >= size) {
+			pr_err("The tty (%s %#x) queued data overrflow %zu bytes limit\n",
+			       dinfo->driver->name, dinfo->id, size);
+			off = size;
+			break;
+		}
+	}
+
+	if (off) {
+		dinfo->tty_data = buf;
+		dinfo->tty_data_size = off;
+
+		e.tty_id	= dinfo->id;
+		e.data.data	= (void *)buf;
+		e.data.len	= off;
+
+		ret = pb_write_one(img_from_set(glob_imgset, CR_FD_TTY_DATA),
+				   &e, PB_TTY_DATA);
+	} else {
+		xfree(buf);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static void tty_do_writeback_queued_data(struct tty_dump_info *dinfo)
+{
+#define __dinfo_write_reblock(__d)							\
+	do {										\
+		if (write((__d)->link->lfd, (__d)->tty_data,				\
+			  (__d)->tty_data_size) != (__d)->tty_data_size)		\
+			pr_perror("Can't writeback to tty (%#x)\n", (__d)->id);		\
+		tty_reblock((__d)->link->id, (__d)->link->lfd, (__d)->link->flags);	\
+	} while (0)
+
+	if (dinfo->tty_data)
+		__dinfo_write_reblock(dinfo);
+	if (dinfo->link->tty_data)
+		__dinfo_write_reblock(dinfo->link);
+#undef __dinfo_write_reblock
+}
+
+/*
+ * Dumping queued data must be done at the very end of the
+ * checkpoint procedure -- it's tail optimization, we trying
+ * to defer this procedure until everything else passed
+ * succesfully because in real it is time consuming on
+ * its own which might require writting data back to the
+ * former peers if case something go wrong.
+ *
+ * Moreover when we gather PTYs peers into own list we
+ * do it in destructive way -- the former @all_ttys
+ * list get modified (one of the peer get moved from
+ * @all_ttys to @all_ptys list) because otherwise we
+ * will have to add one more entry into tty_dump_info,
+ * thus we simply reuse the @list entry for own needs.
+ */
+int tty_dump_queued_data(int status)
+{
+	struct tty_dump_info *dinfo, *peer, *n;
+	LIST_HEAD(all_ptys);
+	int ret = 0;
+
+	/*
+	 * Link PTY peers, and move one of linked
+	 * into separate list.
+	 */
+	list_for_each_entry_safe(dinfo, n, &all_ttys, list) {
+		if (!is_pty(dinfo->driver) || dinfo->link)
+			continue;
+
+		peer = dinfo;
+		list_for_each_entry_continue(peer, &all_ttys, list) {
+			if (!is_pty(peer->driver) || peer->link)
+				continue;
+
+			if (peer->index == dinfo->index) {
+				dinfo->link = peer;
+				peer->link = dinfo;
+				pr_debug("Link PTYs (%#x)\n", dinfo->id);
+
+				list_move(&dinfo->list, &all_ptys);
+			}
+		}
+	}
+
+	/*
+	 * Once linked fetch the queued data if present.
+	 */
+	list_for_each_entry(dinfo, &all_ptys, list) {
+		ret = tty_do_dump_queued_data(dinfo);
+		if (ret)
+			break;
+		ret = tty_do_dump_queued_data(dinfo->link);
+		if (ret)
+			break;
+	}
+
+	if (ret || opts.final_state != TASK_DEAD) {
+		list_for_each_entry(dinfo, &all_ptys, list)
+			tty_do_writeback_queued_data(dinfo);
+	}
+
+#define __tty_dinfo_clean(__d)		\
+	close_safe(&(__d)->lfd),	\
+	xfree((__d)->tty_data),		\
+	list_del(&(__d)->list),		\
+	xfree((__d))
+
+	list_for_each_entry_safe(dinfo, n, &all_ptys, list) {
+		__tty_dinfo_clean(dinfo->link);
+		__tty_dinfo_clean(dinfo);
+	}
+
+#undef __tty_dinfo_clean
+
+	list_for_each_entry_safe(dinfo, n, &all_ttys, list) {
+		list_del(&dinfo->list);
+		xfree(dinfo);
+	}
+
+	return ret;
+}
+
 int tty_prep_fds(void)
 {
 	if (!opts.shell_job)
diff --git a/images/tty.proto b/images/tty.proto
index 0b444b2439e8..f3d55f6f0816 100644
--- a/images/tty.proto
+++ b/images/tty.proto
@@ -34,6 +34,11 @@ enum TtyType {
 	SERIAL		= 6;
 }
 
+message tty_data_entry {
+	required uint32			tty_id		= 1;
+	required bytes			data		= 2;
+}
+
 message tty_info_entry {
 	required uint32			id		=  1;
 
diff --git a/lib/py/images/images.py b/lib/py/images/images.py
index 0bc0a1fa3ea4..1f0d7085c0b0 100644
--- a/lib/py/images/images.py
+++ b/lib/py/images/images.py
@@ -370,6 +370,7 @@ handlers = {
 	'MNTS'			: entry_handler(mnt_entry),
 	'TTY_FILES'		: entry_handler(tty_file_entry),
 	'TTY_INFO'		: entry_handler(tty_info_entry),
+	'TTY_DATA'		: entry_handler(tty_data_entry),
 	'RLIMIT'		: entry_handler(rlimit_entry),
 	'TUNFILE'		: entry_handler(tunfile_entry),
 	'EXT_FILES'		: entry_handler(ext_file_entry),
-- 
2.5.5



More information about the CRIU mailing list