[CRIU] [PATCH 18/18 v2] SCM: Dump and restore SCM_RIGHTs
Pavel Emelyanov
xemul at virtuozzo.com
Thu Jul 13 14:24:29 MSK 2017
Most of the pieces has already been described in the previous patches :)
so here's the summary.
* Dump:
When receiving a message, also receive any SCM-s (already there) and when
SCM_RIGHTs one is met -- go ahead and just dump received descriptors using
regular code, but taking current as the victim task.
Few words about file paths resolution -- since we do dump path-ed files
by receiving them from victim's parasite, such files sent via sockets
should still work OK, as we still receive them, just from another socket.
Several problems here:
1. Unix sockets sent via unix sockets form knots. Not supported.
2. Eventpolls sent via unix might themseves poll unix sockets. Knots
again. Not supported either.
* Restore:
On restore we need to make unix socket wait for the soon-to-be-scm-sent
descriptors to get restored, so we need to find them, then put a dependency.
After that, the fake fdinfo entry is attached to the respective file
descs, when sent the respective descriptors are closed.
https://github.com/xemul/criu/issues/251
v2: Addressed comments from Kirill
* Moved prepare_scms before adding fake fles (with comment)
* Add scm-only fles as fake, thus removing close_scm_fds
* Try hard finding any suitable fle to use as scm one when
queuing them for unix socket scm list, only allocate a new
one if really needed
Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
---
criu/cr-restore.c | 23 ++++++++
criu/include/sockets.h | 2 +
criu/sk-queue.c | 140 ++++++++++++++++++++++++++++++++++++++++++++-
criu/sk-unix.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++-
images/sk-packet.proto | 6 ++
5 files changed, 318 insertions(+), 5 deletions(-)
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index e14fa06..e11d724 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -359,6 +359,29 @@ static int root_prepare_shared(void)
if (ret)
goto err;
+ /*
+ * This should be called with all packets collected AND all
+ * fdescs and fles prepared BUT post-prep-s not run.
+ *
+ * Also, add_fake_fds_masters() should go afterwards
+ *
+ * 1)It may add a master file there, and this master must be
+ * resolved in add_fake_fds_masters(). Otherwise the task,
+ * which is the owner of this just added master, may not have
+ * rights to create the master (imagine, scm file is a socket
+ * of a net_ns, which can't be assigned by the task);
+ *
+ * 2)Another case -- there was not a task, which has
+ * permittions to create a socket, and you added it in
+ * prepare_scms(). In this case, we mustn't add one more fle
+ * in add_fake_fds_masters() -- and if this function is
+ * called after prepare_scms(), it won't add anything. This
+ * will reduce number of fake files, we add.
+ */
+ ret = prepare_scms();
+ if (ret)
+ goto err;
+
/* This func may add new files, so it must be called before post prepare */
ret = add_fake_fds_masters();
if (ret)
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index 3fa8017..1bd5c67 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -38,6 +38,8 @@ extern int collect_sockets(struct ns_id *);
extern struct collect_image_info inet_sk_cinfo;
extern struct collect_image_info unix_sk_cinfo;
extern int fix_external_unix_sockets(void);
+extern int prepare_scms(void);
+extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
extern struct collect_image_info netlink_sk_cinfo;
diff --git a/criu/sk-queue.c b/criu/sk-queue.c
index 77e203e..f3ebd6c 100644
--- a/criu/sk-queue.c
+++ b/criu/sk-queue.c
@@ -18,9 +18,9 @@
#include "util.h"
#include "util-pie.h"
#include "sockets.h"
-
+#include "xmalloc.h"
#include "sk-queue.h"
-
+#include "files.h"
#include "protobuf.h"
#include "images/sk-packet.pb-c.h"
@@ -28,6 +28,8 @@ struct sk_packet {
struct list_head list;
SkPacketEntry *entry;
char *data;
+ unsigned scm_len;
+ int *scm;
};
static LIST_HEAD(packets_list);
@@ -37,12 +39,22 @@ static int collect_one_packet(void *obj, ProtobufCMessage *msg, struct cr_img *i
struct sk_packet *pkt = obj;
pkt->entry = pb_msg(msg, SkPacketEntry);
-
+ pkt->scm = NULL;
pkt->data = xmalloc(pkt->entry->length);
if (pkt->data ==NULL)
return -1;
/*
+ * See dump_packet_cmsg() -- only SCM_RIGHTS are supported and
+ * only 1 of that kind is possible, thus not more than 1 SCMs
+ * on a packet.
+ */
+ if (pkt->entry->n_scm > 1) {
+ pr_err("More than 1 SCM is not possible\n");
+ return -1;
+ }
+
+ /*
* NOTE: packet must be added to the tail. Otherwise sequence
* will be broken.
*/
@@ -64,6 +76,50 @@ struct collect_image_info sk_queues_cinfo = {
.collect = collect_one_packet,
};
+static int dump_scm_rights(struct cmsghdr *ch, SkPacketEntry *pe)
+{
+ int nr_fds, *fds, i;
+ void *buf;
+ ScmEntry *scme;
+
+ nr_fds = (ch->cmsg_len - sizeof(*ch)) / sizeof(int);
+ fds = (int *)CMSG_DATA(ch);
+
+ buf = xmalloc(sizeof(ScmEntry) + nr_fds * sizeof(uint32_t));
+ if (!buf)
+ return -1;
+
+ scme = xptr_pull(&buf, ScmEntry);
+ scm_entry__init(scme);
+ scme->type = SCM_RIGHTS;
+ scme->n_rights = nr_fds;
+ scme->rights = xptr_pull_s(&buf, nr_fds * sizeof(uint32_t));
+
+ for (i = 0; i < nr_fds; i++) {
+ int ftyp;
+
+ if (dump_my_file(fds[i], &scme->rights[i], &ftyp))
+ return -1;
+
+ /*
+ * Unix sent over Unix or Epoll with some other sh*t
+ * sent over unix (maybe with this very unix polled)
+ * are tricky and not supported for now. (XXX -- todo)
+ */
+ if (ftyp == FD_TYPES__UNIXSK || ftyp == FD_TYPES__EVENTPOLL) {
+ pr_err("Can't dump send %d (unix/epoll) fd\n", ftyp);
+ return -1;
+ }
+ }
+
+ i = pe->n_scm++;
+ if (xrealloc_safe(&pe->scm, pe->n_scm * sizeof(ScmEntry*)))
+ return -1;
+
+ pe->scm[i] = scme;
+ return 0;
+}
+
/*
* Maximum size of the control messages. XXX -- is there any
* way to get this value out of the kernel?
@@ -73,8 +129,26 @@ struct collect_image_info sk_queues_cinfo = {
static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
{
struct cmsghdr *ch;
+ int n_rights = 0;
for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) {
+ if (ch->cmsg_type == SCM_RIGHTS) {
+ if (n_rights) {
+ /*
+ * Even if user is sending more than one cmsg with
+ * rights, kernel merges them alltogether on recv.
+ */
+ pr_err("Unexpected 2nd SCM_RIGHTS from the kernel\n");
+ return -1;
+ }
+
+ if (dump_scm_rights(ch, pe))
+ return -1;
+
+ n_rights++;
+ continue;
+ }
+
pr_err("Control messages in queue, not supported\n");
return -1;
}
@@ -82,6 +156,18 @@ static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
return 0;
}
+static void release_cmsg(SkPacketEntry *pe)
+{
+ int i;
+
+ for (i = 0; i < pe->n_scm; i++)
+ xfree(pe->scm[i]);
+ xfree(pe->scm);
+
+ pe->n_scm = 0;
+ pe->scm = NULL;
+}
+
int dump_sk_queue(int sock_fd, int sock_id)
{
SkPacketEntry pe = SK_PACKET_ENTRY__INIT;
@@ -181,6 +267,9 @@ int dump_sk_queue(int sock_fd, int sock_id)
ret = -EIO;
goto err_set_sock;
}
+
+ if (pe.scm)
+ release_cmsg(&pe);
}
ret = 0;
@@ -209,6 +298,11 @@ static int send_one_pkt(int fd, struct sk_packet *pkt)
iov.iov_base = pkt->data;
iov.iov_len = entry->length;
+ if (pkt->scm != NULL) {
+ mh.msg_controllen = pkt->scm_len;
+ mh.msg_control = pkt->scm;
+ }
+
/*
* Don't try to use sendfile here, because it use sendpage() and
* all data are split on pages and a new skb is allocated for
@@ -264,3 +358,43 @@ int restore_sk_queue(int fd, unsigned int peer_id)
out:
return ret;
}
+
+int prepare_scms(void)
+{
+ struct sk_packet *pkt;
+
+ pr_info("Preparing SCMs\n");
+ list_for_each_entry(pkt, &packets_list, list) {
+ SkPacketEntry *pe = pkt->entry;
+ ScmEntry *se;
+ struct cmsghdr *ch;
+
+ if (!pe->n_scm)
+ continue;
+
+ se = pe->scm[0]; /* Only 1 SCM is possible */
+
+ if (se->type == SCM_RIGHTS) {
+ pkt->scm_len = CMSG_SPACE(se->n_rights * sizeof(int));
+ pkt->scm = xmalloc(pkt->scm_len);
+ if (!pkt->scm)
+ return -1;
+
+ ch = (struct cmsghdr *)pkt->scm; /* FIXME -- via msghdr */
+ ch->cmsg_level = SOL_SOCKET;
+ ch->cmsg_type = SCM_RIGHTS;
+ ch->cmsg_len = CMSG_LEN(se->n_rights * sizeof(int));
+
+ if (unix_note_scm_rights(pe->id_for, se->rights,
+ (int *)CMSG_DATA(ch), se->n_rights))
+ return -1;
+
+ continue;
+ }
+
+ pr_err("Unsupported scm %d in image\n", se->type);
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 42ce1bb..3963a4a 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -798,6 +798,7 @@ struct unix_sk_info {
struct file_desc d;
struct list_head connected; /* List of sockets, connected to me */
struct list_head node; /* To link in peer's connected list */
+ struct list_head scm_fles;
/*
* For DGRAM sockets with queues, we should only restore the queue
@@ -809,6 +810,11 @@ struct unix_sk_info {
u8 listen:1;
};
+struct scm_fle {
+ struct list_head l;
+ struct fdinfo_list_entry *fle;
+};
+
#define USK_PAIR_MASTER 0x1
#define USK_PAIR_SLAVE 0x2
@@ -824,6 +830,141 @@ static struct unix_sk_info *find_unix_sk_by_ino(int ino)
return NULL;
}
+static struct unix_sk_info *find_queuer_for(int id)
+{
+ struct unix_sk_info *ui;
+
+ list_for_each_entry(ui, &unix_sockets, list) {
+ if (ui->queuer == id)
+ return ui;
+ }
+
+ return NULL;
+}
+
+static struct fdinfo_list_entry *get_fle_for_scm(struct file_desc *tgt,
+ struct pstree_item *owner)
+{
+ struct fdinfo_list_entry *fle;
+ FdinfoEntry *e = NULL;
+ int fd;
+
+ list_for_each_entry(fle, &tgt->fd_info_head, desc_list) {
+ if (fle->task == owner)
+ /*
+ * Owner already has this file in its fdtable.
+ * Just use one.
+ */
+ return fle;
+
+ e = fle->fe; /* keep any for further reference */
+ }
+
+ /*
+ * Some other task restores this file. Pretend that
+ * we're another user of it.
+ */
+ fd = find_unused_fd(owner, -1);
+ pr_info("`- will add SCM-only %d fd\n", fd);
+
+ if (e != NULL) {
+ e = dup_fdinfo(e, fd, 0);
+ if (!e) {
+ pr_err("Can't duplicate fdinfo for scm\n");
+ return NULL;
+ }
+ } else {
+ /*
+ * This can happen if the file in question is
+ * sent over the socket and closed. In this case
+ * we need to ... invent a new one!
+ */
+
+ e = xmalloc(sizeof(*e));
+ if (!e)
+ return NULL;
+
+ fdinfo_entry__init(e);
+ e->id = tgt->id;
+ e->type = tgt->ops->type;
+ e->fd = fd;
+ e->flags = 0;
+ }
+
+ /*
+ * Make this fle fake, so that files collecting engine
+ * closes them at the end.
+ */
+ return collect_fd_to(vpid(owner), e, rsti(owner), tgt, true);
+}
+
+int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids)
+{
+ struct unix_sk_info *ui;
+ struct pstree_item *owner;
+ int i;
+
+ ui = find_queuer_for(id_for);
+ if (!ui) {
+ pr_err("Can't find sender for %d\n", id_for);
+ return -1;
+ }
+
+ pr_info("Found queuer for %d -> %d\n", id_for, ui->ue->id);
+ /*
+ * This is the task that will restore this socket
+ */
+ owner = file_master(&ui->d)->task;
+
+ pr_info("-> will set up deps\n");
+ /*
+ * The ui will send data to the rights receiver. Add a fake fle
+ * for the file and a dependency.
+ */
+ for (i = 0; i < n_ids; i++) {
+ struct file_desc *tgt;
+ struct scm_fle *sfle;
+
+ tgt = find_file_desc_raw(FD_TYPES__UND, file_ids[i]);
+ if (!tgt) {
+ pr_err("Can't find fdesc to send\n");
+ return -1;
+ }
+
+ pr_info("scm: add file %d -> %d\n", tgt->id, vpid(owner));
+ sfle = xmalloc(sizeof(*sfle));
+ if (!sfle)
+ return -1;
+
+ sfle->fle = get_fle_for_scm(tgt, owner);
+ if (!sfle->fle) {
+ pr_err("Can't request new fle for scm\n");
+ return -1;
+ }
+
+ list_add_tail(&sfle->l, &ui->scm_fles);
+ fds[i] = sfle->fle->fe->fd;
+ }
+
+ return 0;
+}
+
+static int chk_restored_scms(struct unix_sk_info *ui)
+{
+ struct scm_fle *sf, *n;
+
+ list_for_each_entry_safe(sf, n, &ui->scm_fles, l) {
+ if (sf->fle->stage < FLE_OPEN)
+ return 1;
+
+ /* Optimization for the next pass */
+ list_del(&sf->l);
+ xfree(sf);
+ }
+
+ return 0;
+}
+
static int wake_connected_sockets(struct unix_sk_info *ui)
{
struct fdinfo_list_entry *fle;
@@ -1322,12 +1463,18 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
struct unix_sk_info *ui;
int ret;
+ ui = container_of(d, struct unix_sk_info, d);
+
+ /* FIXME -- only queue restore may be postponed */
+ if (chk_restored_scms(ui)) {
+ pr_info("scm: Wait for tgt to restore\n");
+ return 1;
+ }
+
fle = file_master(d);
if (fle->stage >= FLE_OPEN)
return post_open_unix_sk(d, fle->fe->fd);
- ui = container_of(d, struct unix_sk_info, d);
-
if (inherited_fd(d, new_fd)) {
ui->ue->uflags |= USK_INHERIT;
ret = *new_fd >= 0 ? 0 : -1;
@@ -1440,6 +1587,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
ui->listen = 0;
INIT_LIST_HEAD(&ui->connected);
INIT_LIST_HEAD(&ui->node);
+ INIT_LIST_HEAD(&ui->scm_fles);
ui->flags = 0;
fixup_sock_net_ns_id(&ui->ue->ns_id, &ui->ue->has_ns_id);
diff --git a/images/sk-packet.proto b/images/sk-packet.proto
index 27b48e4..009b461 100644
--- a/images/sk-packet.proto
+++ b/images/sk-packet.proto
@@ -1,8 +1,14 @@
syntax = "proto2";
+message scm_entry {
+ required uint32 type = 1;
+ repeated uint32 rights = 2;
+}
+
message sk_packet_entry {
required uint32 id_for = 1;
required uint32 length = 2;
// optional bytes addr = 3;
// optional sk_ucred_entry ucred = 128;
+ repeated scm_entry scm = 4;
}
--
2.1.4
More information about the CRIU
mailing list