[CRIU] [PATCH 1/2] tty: Write unread pty buffers on post dump stage
Pavel Emelyanov
xemul at virtuozzo.com
Thu Apr 21 06:48:43 PDT 2016
On 04/19/2016 10:37 PM, Cyrill Gorcunov wrote:
> When unread data present on peers we currently simply ignore
> it but actually we can try to fetch it in non(that)destructive
> way.
>
> For this sake we collect tty peers into a separate list
> (currently only _paired_ ptys are supported, which means
> that both ends must belong the container. The reason for
> that it due to we can't guarantee anything on external
> peers which can continue generating data during a dumping
> procedure).
>
> Once they are collected at the post dump stage we read the
> data from peers and write it into the image. In case if
> some error happened we write the data back into
> appropriate pty.
>
> The same applies to restore stage -- we simply link
> peers together and when appropriate peer get opened
> we restore its neighbour content.
>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> criu/cr-dump.c | 2 +
> criu/cr-restore.c | 1 +
> criu/image-desc.c | 1 +
> criu/include/image-desc.h | 1 +
> criu/include/magic.h | 1 +
> criu/include/protobuf-desc.h | 1 +
> criu/include/tty.h | 2 +
> criu/tty.c | 292 ++++++++++++++++++++++++++++++++++++++++++-
> images/tty.proto | 5 +
> lib/py/images/images.py | 1 +
> 10 files changed, 305 insertions(+), 2 deletions(-)
>
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 5ac9fd041e4e..754db6d93478 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1545,6 +1545,8 @@ static int cr_dump_finish(int ret)
> {
> int post_dump_ret = 0;
>
> + tty_post_dump(ret);
> +
We already have tty_verify_active_pairs() and dump_verify_tty_sids()
scattered over the cr_dump_tasks(). Can we reduce this mess, instead
of increasing one?
> if (disconnect_from_page_server())
> ret = -1;
>
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 34db7f7a360d..36ff5f736a22 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -178,6 +178,7 @@ static struct collect_image_info *cinfos[] = {
> &fanotify_mark_cinfo,
> &tty_info_cinfo,
> &tty_cinfo,
> + &tty_cdata,
> &tunfile_cinfo,
> &ext_file_cinfo,
> &timerfd_cinfo,
> diff --git a/criu/image-desc.c b/criu/image-desc.c
> index 2949c592b17f..2b31354f29da 100644
> --- a/criu/image-desc.c
> +++ b/criu/image-desc.c
> @@ -82,6 +82,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
> FD_ENTRY(BINFMT_MISC, "binfmt-misc-%d"),
> FD_ENTRY(TTY_FILES, "tty"),
> FD_ENTRY(TTY_INFO, "tty-info"),
> + FD_ENTRY_F(TTY_DATA, "tty-data", O_NOBUF),
> FD_ENTRY(FILE_LOCKS, "filelocks"),
> FD_ENTRY(RLIMIT, "rlimit-%d"),
> FD_ENTRY_F(PAGES, "pages-%u", O_NOBUF),
> diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h
> index e39db39ad5de..7e75ede80b82 100644
> --- a/criu/include/image-desc.h
> +++ b/criu/include/image-desc.h
> @@ -69,6 +69,7 @@ enum {
> CR_FD_FIFO_DATA,
> CR_FD_TTY_FILES,
> CR_FD_TTY_INFO,
> + CR_FD_TTY_DATA,
> CR_FD_REMAP_FPATH,
> CR_FD_EVENTFD_FILE,
> CR_FD_EVENTPOLL_FILE,
> diff --git a/criu/include/magic.h b/criu/include/magic.h
> index 8fb6b18d45ca..a458c62e632a 100644
> --- a/criu/include/magic.h
> +++ b/criu/include/magic.h
> @@ -76,6 +76,7 @@
> #define NETNS_MAGIC 0x55933752 /* Dolgoprudny */
> #define TTY_FILES_MAGIC 0x59433025 /* Pushkin */
> #define TTY_INFO_MAGIC 0x59453036 /* Kolpino */
> +#define TTY_DATA_MAGIC 0x59413026 /* Pavlovsk */
> #define FILE_LOCKS_MAGIC 0x54323616 /* Kaluga */
> #define RLIMIT_MAGIC 0x57113925 /* Rostov */
> #define FANOTIFY_FILE_MAGIC 0x55096122 /* Chelyabinsk */
> diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
> index a851f1273f11..4cff5db24bdb 100644
> --- a/criu/include/protobuf-desc.h
> +++ b/criu/include/protobuf-desc.h
> @@ -58,6 +58,7 @@ enum {
> PB_NETNS,
> PB_BINFMT_MISC, /* 50 */
> PB_AUTOFS,
> + PB_TTY_DATA,
>
> /* PB_AUTOGEN_STOP */
>
> diff --git a/criu/include/tty.h b/criu/include/tty.h
> index 48f743eb293b..7ed0895e7168 100644
> --- a/criu/include/tty.h
> +++ b/criu/include/tty.h
> @@ -18,9 +18,11 @@ static inline int is_tty(dev_t rdev, dev_t dev)
> return get_tty_driver(rdev, dev) != NULL;
> }
>
> +extern int tty_post_dump(int ret);
> extern int dump_verify_tty_sids(void);
> extern struct collect_image_info tty_info_cinfo;
> extern struct collect_image_info tty_cinfo;
> +extern struct collect_image_info tty_cdata;
> extern int prepare_shared_tty(void);
>
> extern int tty_verify_active_pairs(void);
> diff --git a/criu/tty.c b/criu/tty.c
> index 68d7ba3d133b..2a94ae573ec3 100644
> --- a/criu/tty.c
> +++ b/criu/tty.c
> @@ -78,6 +78,11 @@ struct tty_info_entry {
> TtyInfoEntry *tie;
> };
>
> +struct tty_data_entry {
> + struct list_head list;
> + TtyDataEntry *tde;
> +};
> +
> struct tty_info {
> struct list_head list;
> struct file_desc d;
> @@ -94,6 +99,8 @@ struct tty_info {
> bool inherit;
>
> struct tty_info *ctl_tty;
> + struct tty_info *link;
> + struct tty_data_entry *tty_data;
> };
>
> struct tty_dump_info {
> @@ -104,8 +111,16 @@ struct tty_dump_info {
> pid_t pgrp;
> int fd;
> struct tty_driver *driver;
> +
> + int index;
> + int lfd;
> + int flags;
> + struct tty_dump_info *link;
> + void *tty_data;
> + size_t tty_data_size;
> };
>
> +static LIST_HEAD(all_tty_data_entries);
> static LIST_HEAD(all_tty_info_entries);
> static LIST_HEAD(all_ttys);
>
> @@ -823,6 +838,19 @@ static int pty_open_slaves(struct tty_info *info)
> goto err;
> }
>
> + if (slave->link && slave->link->tty_data) {
> + ProtobufCBinaryData bd = slave->link->tty_data->tde->data;
> + int retval;
> +
> + pr_debug("restore queued data on slave %#x (%zu bytes)\n",
> + info->tfe->id, (size_t)bd.len);
> +
> + retval = write(fd, bd.data, bd.len);
> + if (retval != bd.len)
> + pr_err("Restored %d bytes while %zu expected\n",
> + retval, (size_t)bd.len);
> + slave->link->tty_data = NULL;
What for?
> + }
> close(fd);
> fd = -1;
> }
> @@ -963,6 +991,20 @@ static int pty_open_ptmx(struct tty_info *info)
> if (pty_open_slaves(info))
> goto err;
>
> + if (info->link && info->link->tty_data) {
> + ProtobufCBinaryData bd = info->link->tty_data->tde->data;
> + int ret;
> +
> + pr_debug("restore queued data on master %#x (%zu bytes)\n",
> + info->tfe->id, (size_t)bd.len);
> +
> + ret = write(master, bd.data, bd.len);
> + if (ret != bd.len)
> + pr_err("Restored %d bytes while %zu expected\n",
> + ret, (size_t)bd.len);
> + info->link->tty_data = NULL;
Factor this out with the above hunk.
> + }
> +
> if (info->tie->locked)
> lock_pty(master);
>
> @@ -1222,11 +1264,50 @@ static int tty_setup_orphan_slavery(void)
> return 0;
> }
>
> +static struct tty_data_entry *tty_lookup_data(struct tty_info *info)
> +{
> + struct tty_data_entry *td;
> +
> + list_for_each_entry(td, &all_tty_data_entries, list) {
> + if (td->tde->tty_id == info->tie->id)
> + return td;
> + }
> +
> + return NULL;
> +}
> +
> static int tty_setup_slavery(void * unused)
> {
> struct tty_info *info, *peer, *m;
>
> /*
> + * Setup links for PTY terminal pairs.
> + */
> + list_for_each_entry(info, &all_ttys, list) {
> + if (!is_pty(info->driver) || info->link)
> + continue;
> + peer = info;
> + list_for_each_entry_continue(peer, &all_ttys, list) {
> + if (!is_pty(peer->driver) || peer->link)
> + continue;
> + if (peer->tie->pty->index == info->tie->pty->index) {
> + info->link = peer;
> + peer->link = info;
> +
> + pr_debug("Link PTYs (%#x)\n", info->tfe->id);
> +
> + info->tty_data = tty_lookup_data(info);
> + peer->tty_data = tty_lookup_data(peer);
Can we better lookup for tty_info in data collecting routine?
> +
> + pr_debug("queued data %s %p %s %p\n",
> + info->driver->name, info->tty_data,
> + peer->driver->name, peer->tty_data);
> + break;
> + }
> + }
> + }
> +
> + /*
> * The image may carry several terminals opened
> * belonging to the same session, so choose the
> * leader which gonna be setting up the controlling
> @@ -1417,6 +1498,8 @@ static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i)
> info->create = tty_is_master(info);
> info->inherit = false;
> info->ctl_tty = NULL;
> + info->tty_data = NULL;
> + info->link = NULL;
>
> if (verify_info(info))
> return -1;
> @@ -1473,6 +1556,24 @@ struct collect_image_info tty_cinfo = {
> .collect = collect_one_tty,
> };
>
> +static int collect_one_tty_data(void *obj, ProtobufCMessage *msg, struct cr_img *i)
> +{
> + struct tty_data_entry *tdo = obj;
> +
> + tdo->tde = pb_msg(msg, TtyDataEntry);
> + list_add(&tdo->list, &all_tty_data_entries);
> + pr_debug("Collected data for id %#x (size %zu bytes)\n",
> + tdo->tde->tty_id, (size_t)tdo->tde->data.len);
> + return 0;
> +}
> +
> +struct collect_image_info tty_cdata = {
> + .fd_type = CR_FD_TTY_DATA,
> + .pb_type = PB_TTY_DATA,
> + .priv_size = sizeof(struct tty_data_entry),
> + .collect = collect_one_tty_data,
> +};
> +
> /* Make sure the ttys we're dumping do belong our process tree */
> int dump_verify_tty_sids(void)
> {
> @@ -1518,7 +1619,6 @@ int dump_verify_tty_sids(void)
> }
> }
> }
> - xfree(dinfo);
> }
>
> return ret;
> @@ -1551,7 +1651,7 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d
> if (!pti)
> return -1;
>
> - dinfo = xmalloc(sizeof(*dinfo));
> + dinfo = xzalloc(sizeof(*dinfo));
> if (!dinfo)
> return -1;
>
> @@ -1561,6 +1661,20 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d
> dinfo->fd = p->fd;
> dinfo->driver = driver;
>
> + if (is_pty(driver)) {
> + dinfo->lfd = dup(lfd);
> + if (dinfo->lfd < 0) {
> + pr_perror("Can't dup local fd on %x", id);
> + xfree(dinfo);
> + return -1;
> + }
> + dinfo->index = index;
> + dinfo->flags = p->flags;
> + } else {
> + dinfo->index = -1;
> + dinfo->lfd = -1;
> + }
> +
> list_add_tail(&dinfo->list, &all_ttys);
>
> info.id = id;
> @@ -1698,6 +1812,180 @@ const struct fdtype_ops tty_dump_ops = {
> .dump = dump_one_tty,
> };
>
> +static int tty_reblock(int id, int lfd, int flags)
> +{
> + static const int fmask = O_RDWR | O_NONBLOCK;
> +
> + if ((flags & fmask) != fmask) {
> + if (fcntl(lfd, F_SETFL, flags)) {
> + pr_perror("Can't revert mode back to %o on (%#x)\n", fmask, id);
> + return -errno;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int tty_unblock(int id, int lfd, int flags)
> +{
> + static const int fmask = O_RDWR | O_NONBLOCK;
> +
> + if ((flags & fmask) != fmask) {
> + if (fcntl(lfd, F_SETFL, fmask)) {
> + pr_perror("Can't change mode to %o on (%#x)\n", fmask, id);
> + return -errno;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static char *tty_read_data(struct tty_dump_info *dinfo, size_t *ret_size)
> +{
> + size_t off = 0, size = 16384;
> + char *buf;
> + int ret;
> +
> + buf = xmalloc(size);
> + if (!buf)
> + return ERR_PTR(-ENOMEM);
> +
> + ret = tty_unblock(dinfo->id, dinfo->lfd, dinfo->flags);
> + if (ret) {
> + xfree(buf);
> + return ERR_PTR(ret);
> + }
> +
> + while (1) {
> + ret = read(dinfo->lfd, &buf[off], size - off);
> + if (ret == 0) {
> + pr_debug("No more data on tty (%s %#x)\n",
> + dinfo->driver->name, dinfo->id);
> + break;
> + } else if (ret < 0) {
> + if (errno == EAGAIN) {
> + pr_debug("Not waiting data tty (%s %#x)\n",
> + dinfo->driver->name, dinfo->id);
> + break;
> + } else {
> + ret = -errno;
> + pr_perror("Can't read data from tty (%s %#x)",
> + dinfo->driver->name, dinfo->id);
> + xfree(buf);
> + return ERR_PTR(ret);
> + }
> + }
> +
> + off += ret;
> + pr_debug("Read %d bytes (%d) from tty (%s %#x)\n",
> + ret, (int)off, dinfo->driver->name, dinfo->id);
> +
> + if (off >= size) {
> + pr_err("The tty (%s %#x) queued data overrflow %zu bytes limit\n",
> + dinfo->driver->name, dinfo->id, size);
> + off = size;
> + break;
> + }
> + }
> +
> + if (!off) {
> + xfree(buf);
> + buf = NULL;
> + }
> +
> + *ret_size = off;
> + return buf;
> +}
> +
> +int tty_post_dump(int status)
> +{
> + struct tty_dump_info *dinfo, *peer, *n;
> + LIST_HEAD(all_ptys);
> + int ret = 0;
> +
> + /*
> + * Link PTY peers, and move one of linked
> + * into separate list.
> + */
> + list_for_each_entry_safe(dinfo, n, &all_ttys, list) {
> + if (!is_pty(dinfo->driver) || dinfo->link)
> + continue;
> + peer = dinfo;
> + list_for_each_entry_continue(peer, &all_ttys, list) {
> + if (!is_pty(peer->driver) || peer->link)
> + continue;
> +
> + if (peer->index == dinfo->index) {
> + dinfo->link = peer;
> + peer->link = dinfo;
> + pr_debug("Link PTYs (%#x)\n", dinfo->id);
> + list_move(&dinfo->list, &all_ptys);
> + }
> + }
> + }
Can we do this linkage while dumping the ttys?
> +
> + list_for_each_entry(dinfo, &all_ptys, list) {
> + TtyDataEntry e;
> +
> + dinfo->tty_data = tty_read_data(dinfo, &dinfo->tty_data_size);
> + if (IS_ERR(dinfo->tty_data)) {
> + ret = PTR_ERR(dinfo->tty_data);
> + dinfo->tty_data = NULL;
> + break;
> + } else if (dinfo->tty_data) {
> + tty_data_entry__init(&e);
> + e.tty_id = dinfo->id;
> + e.data.data = dinfo->tty_data;
> + e.data.len = dinfo->tty_data_size;
> +
> + ret = pb_write_one(img_from_set(glob_imgset, CR_FD_TTY_DATA), &e, PB_TTY_DATA);
> + if (ret)
> + break;
> + }
> +
> + dinfo->link->tty_data = tty_read_data(dinfo->link, &dinfo->link->tty_data_size);
> + if (IS_ERR(dinfo->link->tty_data)) {
> + ret = PTR_ERR(dinfo->link->tty_data);
> + dinfo->link->tty_data = NULL;
> + break;
> + } else if (dinfo->link->tty_data) {
> + tty_data_entry__init(&e);
> + e.tty_id = dinfo->link->id;
> + e.data.data = dinfo->link->tty_data;
> + e.data.len = dinfo->link->tty_data_size;
> +
> + ret = pb_write_one(img_from_set(glob_imgset, CR_FD_TTY_DATA), &e, PB_TTY_DATA);
> + if (ret)
> + break;
> + }
Factor the code out.
> + }
> +
> + if (ret || opts.final_state == TASK_ALIVE) {
|| opts.final_state != TASK_DEAD
> + list_for_each_entry(dinfo, &all_ptys, list) {
> + if (dinfo->link->tty_data) {
> + if (write(dinfo->link->lfd, dinfo->link->tty_data,
> + dinfo->link->tty_data_size) != dinfo->link->tty_data_size)
> + pr_perror("Can't writedata data back to tty (%#x)\n",
> + dinfo->link->id);
> + tty_reblock(dinfo->link->id, dinfo->link->lfd, dinfo->link->flags);
> + }
> + }
> + }
> +
> + list_for_each_entry_safe(dinfo, n, &all_ttys, list) {
> + close_safe(&dinfo->lfd);
> + xfree(dinfo->tty_data);
> + }
> +
> + list_for_each_entry_safe(dinfo, n, &all_ptys, list) {
> + close_safe(&dinfo->lfd);
> + xfree(dinfo->tty_data);
> + xfree(dinfo);
> + }
> +
> + return ret;
> +}
> +
> int tty_prep_fds(void)
> {
> if (!opts.shell_job)
> diff --git a/images/tty.proto b/images/tty.proto
> index 4b5a70c20a88..a961b5e7339c 100644
> --- a/images/tty.proto
> +++ b/images/tty.proto
> @@ -33,6 +33,11 @@ enum TtyType {
> EXT_TTY = 5;
> }
>
> +message tty_data_entry {
> + required uint32 tty_id = 1;
> + required bytes data = 2;
> +}
> +
> message tty_info_entry {
> required uint32 id = 1;
>
> diff --git a/lib/py/images/images.py b/lib/py/images/images.py
> index 0bc0a1fa3ea4..1f0d7085c0b0 100644
> --- a/lib/py/images/images.py
> +++ b/lib/py/images/images.py
> @@ -370,6 +370,7 @@ handlers = {
> 'MNTS' : entry_handler(mnt_entry),
> 'TTY_FILES' : entry_handler(tty_file_entry),
> 'TTY_INFO' : entry_handler(tty_info_entry),
> + 'TTY_DATA' : entry_handler(tty_data_entry),
> 'RLIMIT' : entry_handler(rlimit_entry),
> 'TUNFILE' : entry_handler(tunfile_entry),
> 'EXT_FILES' : entry_handler(ext_file_entry),
>
More information about the CRIU
mailing list