[CRIU] [PATCH 18/18 v2] SCM: Dump and restore SCM_RIGHTs
Kirill Tkhai
ktkhai at virtuozzo.com
Wed Aug 9 21:17:51 MSK 2017
On 13.07.2017 14:24, Pavel Emelyanov wrote:
> Most of the pieces has already been described in the previous patches :)
> so here's the summary.
>
> * Dump:
>
> When receiving a message, also receive any SCM-s (already there) and when
> SCM_RIGHTs one is met -- go ahead and just dump received descriptors using
> regular code, but taking current as the victim task.
>
> Few words about file paths resolution -- since we do dump path-ed files
> by receiving them from victim's parasite, such files sent via sockets
> should still work OK, as we still receive them, just from another socket.
>
> Several problems here:
>
> 1. Unix sockets sent via unix sockets form knots. Not supported.
> 2. Eventpolls sent via unix might themseves poll unix sockets. Knots
> again. Not supported either.
>
> * Restore:
>
> On restore we need to make unix socket wait for the soon-to-be-scm-sent
> descriptors to get restored, so we need to find them, then put a dependency.
> After that, the fake fdinfo entry is attached to the respective file
> descs, when sent the respective descriptors are closed.
>
> https://github.com/xemul/criu/issues/251
>
> v2: Addressed comments from Kirill
>
> * Moved prepare_scms before adding fake fles (with comment)
> * Add scm-only fles as fake, thus removing close_scm_fds
> * Try hard finding any suitable fle to use as scm one when
> queuing them for unix socket scm list, only allocate a new
> one if really needed
>
> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
Skipped this one. I thought, it's v2 as the v2 series.
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> ---
> criu/cr-restore.c | 23 ++++++++
> criu/include/sockets.h | 2 +
> criu/sk-queue.c | 140 ++++++++++++++++++++++++++++++++++++++++++++-
> criu/sk-unix.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++-
> images/sk-packet.proto | 6 ++
> 5 files changed, 318 insertions(+), 5 deletions(-)
>
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index e14fa06..e11d724 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -359,6 +359,29 @@ static int root_prepare_shared(void)
> if (ret)
> goto err;
>
> + /*
> + * This should be called with all packets collected AND all
> + * fdescs and fles prepared BUT post-prep-s not run.
> + *
> + * Also, add_fake_fds_masters() should go afterwards
> + *
> + * 1)It may add a master file there, and this master must be
> + * resolved in add_fake_fds_masters(). Otherwise the task,
> + * which is the owner of this just added master, may not have
> + * rights to create the master (imagine, scm file is a socket
> + * of a net_ns, which can't be assigned by the task);
> + *
> + * 2)Another case -- there was not a task, which has
> + * permittions to create a socket, and you added it in
> + * prepare_scms(). In this case, we mustn't add one more fle
> + * in add_fake_fds_masters() -- and if this function is
> + * called after prepare_scms(), it won't add anything. This
> + * will reduce number of fake files, we add.
> + */
> + ret = prepare_scms();
> + if (ret)
> + goto err;
> +
> /* This func may add new files, so it must be called before post prepare */
> ret = add_fake_fds_masters();
> if (ret)
> diff --git a/criu/include/sockets.h b/criu/include/sockets.h
> index 3fa8017..1bd5c67 100644
> --- a/criu/include/sockets.h
> +++ b/criu/include/sockets.h
> @@ -38,6 +38,8 @@ extern int collect_sockets(struct ns_id *);
> extern struct collect_image_info inet_sk_cinfo;
> extern struct collect_image_info unix_sk_cinfo;
> extern int fix_external_unix_sockets(void);
> +extern int prepare_scms(void);
> +extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
>
> extern struct collect_image_info netlink_sk_cinfo;
>
> diff --git a/criu/sk-queue.c b/criu/sk-queue.c
> index 77e203e..f3ebd6c 100644
> --- a/criu/sk-queue.c
> +++ b/criu/sk-queue.c
> @@ -18,9 +18,9 @@
> #include "util.h"
> #include "util-pie.h"
> #include "sockets.h"
> -
> +#include "xmalloc.h"
> #include "sk-queue.h"
> -
> +#include "files.h"
> #include "protobuf.h"
> #include "images/sk-packet.pb-c.h"
>
> @@ -28,6 +28,8 @@ struct sk_packet {
> struct list_head list;
> SkPacketEntry *entry;
> char *data;
> + unsigned scm_len;
> + int *scm;
> };
>
> static LIST_HEAD(packets_list);
> @@ -37,12 +39,22 @@ static int collect_one_packet(void *obj, ProtobufCMessage *msg, struct cr_img *i
> struct sk_packet *pkt = obj;
>
> pkt->entry = pb_msg(msg, SkPacketEntry);
> -
> + pkt->scm = NULL;
> pkt->data = xmalloc(pkt->entry->length);
> if (pkt->data ==NULL)
> return -1;
>
> /*
> + * See dump_packet_cmsg() -- only SCM_RIGHTS are supported and
> + * only 1 of that kind is possible, thus not more than 1 SCMs
> + * on a packet.
> + */
> + if (pkt->entry->n_scm > 1) {
> + pr_err("More than 1 SCM is not possible\n");
> + return -1;
> + }
> +
> + /*
> * NOTE: packet must be added to the tail. Otherwise sequence
> * will be broken.
> */
> @@ -64,6 +76,50 @@ struct collect_image_info sk_queues_cinfo = {
> .collect = collect_one_packet,
> };
>
> +static int dump_scm_rights(struct cmsghdr *ch, SkPacketEntry *pe)
> +{
> + int nr_fds, *fds, i;
> + void *buf;
> + ScmEntry *scme;
> +
> + nr_fds = (ch->cmsg_len - sizeof(*ch)) / sizeof(int);
> + fds = (int *)CMSG_DATA(ch);
> +
> + buf = xmalloc(sizeof(ScmEntry) + nr_fds * sizeof(uint32_t));
> + if (!buf)
> + return -1;
> +
> + scme = xptr_pull(&buf, ScmEntry);
> + scm_entry__init(scme);
> + scme->type = SCM_RIGHTS;
> + scme->n_rights = nr_fds;
> + scme->rights = xptr_pull_s(&buf, nr_fds * sizeof(uint32_t));
> +
> + for (i = 0; i < nr_fds; i++) {
> + int ftyp;
> +
> + if (dump_my_file(fds[i], &scme->rights[i], &ftyp))
> + return -1;
> +
> + /*
> + * Unix sent over Unix or Epoll with some other sh*t
> + * sent over unix (maybe with this very unix polled)
> + * are tricky and not supported for now. (XXX -- todo)
> + */
> + if (ftyp == FD_TYPES__UNIXSK || ftyp == FD_TYPES__EVENTPOLL) {
> + pr_err("Can't dump send %d (unix/epoll) fd\n", ftyp);
> + return -1;
> + }
> + }
> +
> + i = pe->n_scm++;
> + if (xrealloc_safe(&pe->scm, pe->n_scm * sizeof(ScmEntry*)))
> + return -1;
> +
> + pe->scm[i] = scme;
> + return 0;
> +}
> +
> /*
> * Maximum size of the control messages. XXX -- is there any
> * way to get this value out of the kernel?
> @@ -73,8 +129,26 @@ struct collect_image_info sk_queues_cinfo = {
> static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
> {
> struct cmsghdr *ch;
> + int n_rights = 0;
>
> for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) {
> + if (ch->cmsg_type == SCM_RIGHTS) {
> + if (n_rights) {
> + /*
> + * Even if user is sending more than one cmsg with
> + * rights, kernel merges them alltogether on recv.
> + */
> + pr_err("Unexpected 2nd SCM_RIGHTS from the kernel\n");
> + return -1;
> + }
> +
> + if (dump_scm_rights(ch, pe))
> + return -1;
> +
> + n_rights++;
> + continue;
> + }
> +
> pr_err("Control messages in queue, not supported\n");
> return -1;
> }
> @@ -82,6 +156,18 @@ static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
> return 0;
> }
>
> +static void release_cmsg(SkPacketEntry *pe)
> +{
> + int i;
> +
> + for (i = 0; i < pe->n_scm; i++)
> + xfree(pe->scm[i]);
> + xfree(pe->scm);
> +
> + pe->n_scm = 0;
> + pe->scm = NULL;
> +}
> +
> int dump_sk_queue(int sock_fd, int sock_id)
> {
> SkPacketEntry pe = SK_PACKET_ENTRY__INIT;
> @@ -181,6 +267,9 @@ int dump_sk_queue(int sock_fd, int sock_id)
> ret = -EIO;
> goto err_set_sock;
> }
> +
> + if (pe.scm)
> + release_cmsg(&pe);
> }
> ret = 0;
>
> @@ -209,6 +298,11 @@ static int send_one_pkt(int fd, struct sk_packet *pkt)
> iov.iov_base = pkt->data;
> iov.iov_len = entry->length;
>
> + if (pkt->scm != NULL) {
> + mh.msg_controllen = pkt->scm_len;
> + mh.msg_control = pkt->scm;
> + }
> +
> /*
> * Don't try to use sendfile here, because it use sendpage() and
> * all data are split on pages and a new skb is allocated for
> @@ -264,3 +358,43 @@ int restore_sk_queue(int fd, unsigned int peer_id)
> out:
> return ret;
> }
> +
> +int prepare_scms(void)
> +{
> + struct sk_packet *pkt;
> +
> + pr_info("Preparing SCMs\n");
> + list_for_each_entry(pkt, &packets_list, list) {
> + SkPacketEntry *pe = pkt->entry;
> + ScmEntry *se;
> + struct cmsghdr *ch;
> +
> + if (!pe->n_scm)
> + continue;
> +
> + se = pe->scm[0]; /* Only 1 SCM is possible */
> +
> + if (se->type == SCM_RIGHTS) {
> + pkt->scm_len = CMSG_SPACE(se->n_rights * sizeof(int));
> + pkt->scm = xmalloc(pkt->scm_len);
> + if (!pkt->scm)
> + return -1;
> +
> + ch = (struct cmsghdr *)pkt->scm; /* FIXME -- via msghdr */
> + ch->cmsg_level = SOL_SOCKET;
> + ch->cmsg_type = SCM_RIGHTS;
> + ch->cmsg_len = CMSG_LEN(se->n_rights * sizeof(int));
> +
> + if (unix_note_scm_rights(pe->id_for, se->rights,
> + (int *)CMSG_DATA(ch), se->n_rights))
> + return -1;
> +
> + continue;
> + }
> +
> + pr_err("Unsupported scm %d in image\n", se->type);
> + return -1;
> + }
> +
> + return 0;
> +}
> diff --git a/criu/sk-unix.c b/criu/sk-unix.c
> index 42ce1bb..3963a4a 100644
> --- a/criu/sk-unix.c
> +++ b/criu/sk-unix.c
> @@ -798,6 +798,7 @@ struct unix_sk_info {
> struct file_desc d;
> struct list_head connected; /* List of sockets, connected to me */
> struct list_head node; /* To link in peer's connected list */
> + struct list_head scm_fles;
>
> /*
> * For DGRAM sockets with queues, we should only restore the queue
> @@ -809,6 +810,11 @@ struct unix_sk_info {
> u8 listen:1;
> };
>
> +struct scm_fle {
> + struct list_head l;
> + struct fdinfo_list_entry *fle;
> +};
> +
> #define USK_PAIR_MASTER 0x1
> #define USK_PAIR_SLAVE 0x2
>
> @@ -824,6 +830,141 @@ static struct unix_sk_info *find_unix_sk_by_ino(int ino)
> return NULL;
> }
>
> +static struct unix_sk_info *find_queuer_for(int id)
> +{
> + struct unix_sk_info *ui;
> +
> + list_for_each_entry(ui, &unix_sockets, list) {
> + if (ui->queuer == id)
> + return ui;
> + }
> +
> + return NULL;
> +}
> +
> +static struct fdinfo_list_entry *get_fle_for_scm(struct file_desc *tgt,
> + struct pstree_item *owner)
> +{
> + struct fdinfo_list_entry *fle;
> + FdinfoEntry *e = NULL;
> + int fd;
> +
> + list_for_each_entry(fle, &tgt->fd_info_head, desc_list) {
> + if (fle->task == owner)
> + /*
> + * Owner already has this file in its fdtable.
> + * Just use one.
> + */
> + return fle;
> +
> + e = fle->fe; /* keep any for further reference */
> + }
> +
> + /*
> + * Some other task restores this file. Pretend that
> + * we're another user of it.
> + */
> + fd = find_unused_fd(owner, -1);
> + pr_info("`- will add SCM-only %d fd\n", fd);
> +
> + if (e != NULL) {
> + e = dup_fdinfo(e, fd, 0);
> + if (!e) {
> + pr_err("Can't duplicate fdinfo for scm\n");
> + return NULL;
> + }
> + } else {
> + /*
> + * This can happen if the file in question is
> + * sent over the socket and closed. In this case
> + * we need to ... invent a new one!
> + */
> +
> + e = xmalloc(sizeof(*e));
> + if (!e)
> + return NULL;
> +
> + fdinfo_entry__init(e);
> + e->id = tgt->id;
> + e->type = tgt->ops->type;
> + e->fd = fd;
> + e->flags = 0;
> + }
> +
> + /*
> + * Make this fle fake, so that files collecting engine
> + * closes them at the end.
> + */
> + return collect_fd_to(vpid(owner), e, rsti(owner), tgt, true);
> +}
> +
> +int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids)
> +{
> + struct unix_sk_info *ui;
> + struct pstree_item *owner;
> + int i;
> +
> + ui = find_queuer_for(id_for);
> + if (!ui) {
> + pr_err("Can't find sender for %d\n", id_for);
> + return -1;
> + }
> +
> + pr_info("Found queuer for %d -> %d\n", id_for, ui->ue->id);
> + /*
> + * This is the task that will restore this socket
> + */
> + owner = file_master(&ui->d)->task;
> +
> + pr_info("-> will set up deps\n");
> + /*
> + * The ui will send data to the rights receiver. Add a fake fle
> + * for the file and a dependency.
> + */
> + for (i = 0; i < n_ids; i++) {
> + struct file_desc *tgt;
> + struct scm_fle *sfle;
> +
> + tgt = find_file_desc_raw(FD_TYPES__UND, file_ids[i]);
> + if (!tgt) {
> + pr_err("Can't find fdesc to send\n");
> + return -1;
> + }
> +
> + pr_info("scm: add file %d -> %d\n", tgt->id, vpid(owner));
> + sfle = xmalloc(sizeof(*sfle));
> + if (!sfle)
> + return -1;
> +
> + sfle->fle = get_fle_for_scm(tgt, owner);
> + if (!sfle->fle) {
> + pr_err("Can't request new fle for scm\n");
> + return -1;
> + }
> +
> + list_add_tail(&sfle->l, &ui->scm_fles);
> + fds[i] = sfle->fle->fe->fd;
> + }
> +
> + return 0;
> +}
> +
> +static int chk_restored_scms(struct unix_sk_info *ui)
> +{
> + struct scm_fle *sf, *n;
> +
> + list_for_each_entry_safe(sf, n, &ui->scm_fles, l) {
> + if (sf->fle->stage < FLE_OPEN)
> + return 1;
> +
> + /* Optimization for the next pass */
> + list_del(&sf->l);
> + xfree(sf);
> + }
> +
> + return 0;
> +}
> +
> static int wake_connected_sockets(struct unix_sk_info *ui)
> {
> struct fdinfo_list_entry *fle;
> @@ -1322,12 +1463,18 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
> struct unix_sk_info *ui;
> int ret;
>
> + ui = container_of(d, struct unix_sk_info, d);
> +
> + /* FIXME -- only queue restore may be postponed */
> + if (chk_restored_scms(ui)) {
> + pr_info("scm: Wait for tgt to restore\n");
> + return 1;
> + }
> +
> fle = file_master(d);
> if (fle->stage >= FLE_OPEN)
> return post_open_unix_sk(d, fle->fe->fd);
>
> - ui = container_of(d, struct unix_sk_info, d);
> -
> if (inherited_fd(d, new_fd)) {
> ui->ue->uflags |= USK_INHERIT;
> ret = *new_fd >= 0 ? 0 : -1;
> @@ -1440,6 +1587,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
> ui->listen = 0;
> INIT_LIST_HEAD(&ui->connected);
> INIT_LIST_HEAD(&ui->node);
> + INIT_LIST_HEAD(&ui->scm_fles);
> ui->flags = 0;
> fixup_sock_net_ns_id(&ui->ue->ns_id, &ui->ue->has_ns_id);
>
> diff --git a/images/sk-packet.proto b/images/sk-packet.proto
> index 27b48e4..009b461 100644
> --- a/images/sk-packet.proto
> +++ b/images/sk-packet.proto
> @@ -1,8 +1,14 @@
> syntax = "proto2";
>
> +message scm_entry {
> + required uint32 type = 1;
> + repeated uint32 rights = 2;
> +}
> +
> message sk_packet_entry {
> required uint32 id_for = 1;
> required uint32 length = 2;
> // optional bytes addr = 3;
> // optional sk_ucred_entry ucred = 128;
> + repeated scm_entry scm = 4;
> }
>
More information about the CRIU
mailing list