[CRIU] [PATCH 16/16] SCM: Dump and restore SCM_RIGHTs

Pavel Emelyanov xemul at virtuozzo.com
Thu Jul 6 19:18:35 MSK 2017


Most of the pieces has already been described in the previous patches :)
so here's the summary.

* Dump:

When receiving a message, also receive any SCM-s (already there) and when
SCM_RIGHTs one is met -- go ahead and just dump received descriptors using
regular code, but taking current as the victim task.

Few words about file paths resolution -- since we do dump path-ed files
by receiving them from victim's parasite, such files sent via sockets
should still work OK, as we still receive them, just from another socket.

Several problems here:

1. Unix sockets sent via unix sockets form loops. Not supported.
2. Eventpolls sent via unix might themseves poll unix sockets. Loops
   again. Not supported either.

* Restore:

On restore we need to make unix socket wait for the soon-to-be-scm-sent
descriptors to get restored, so we need to find them, then put a dependency.
After that, the fake fdinfo entry is attached to the respective file
descs, when sent the respective descriptors are closed.

For now more than one cmsg header in the message is not resotred,
because kernel merges all SCM_RIGHTS parts when receiving. Thus this
check is also put on dump path.

https://github.com/xemul/criu/issues/251

Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
---
 criu/cr-restore.c      |   8 +++
 criu/include/sockets.h |   2 +
 criu/sk-queue.c        | 157 ++++++++++++++++++++++++++++++++++++++++++++++++-
 criu/sk-unix.c         | 127 ++++++++++++++++++++++++++++++++++++++-
 images/sk-packet.proto |   6 ++
 5 files changed, 295 insertions(+), 5 deletions(-)

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 965770f..7c9a18e 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -364,6 +364,14 @@ static int root_prepare_shared(void)
 	if (ret)
 		goto err;
 
+	/*
+	 * This should be called with all packets collected AND all
+	 * fdescs and fles prepared BUT post-prep-s not run.
+	 */
+	ret = prepare_scms();
+	if (ret)
+		goto err;
+
 	ret = run_post_prepare();
 	if (ret)
 		goto err;
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index 3fa8017..1bd5c67 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -38,6 +38,8 @@ extern int collect_sockets(struct ns_id *);
 extern struct collect_image_info inet_sk_cinfo;
 extern struct collect_image_info unix_sk_cinfo;
 extern int fix_external_unix_sockets(void);
+extern int prepare_scms(void);
+extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
 
 extern struct collect_image_info netlink_sk_cinfo;
 
diff --git a/criu/sk-queue.c b/criu/sk-queue.c
index 77e203e..953db66 100644
--- a/criu/sk-queue.c
+++ b/criu/sk-queue.c
@@ -18,9 +18,9 @@
 #include "util.h"
 #include "util-pie.h"
 #include "sockets.h"
-
+#include "xmalloc.h"
 #include "sk-queue.h"
-
+#include "files.h"
 #include "protobuf.h"
 #include "images/sk-packet.pb-c.h"
 
@@ -28,6 +28,8 @@ struct sk_packet {
 	struct list_head	list;
 	SkPacketEntry		*entry;
 	char        		*data;
+	unsigned		scm_len;
+	int			*scm;
 };
 
 static LIST_HEAD(packets_list);
@@ -37,12 +39,22 @@ static int collect_one_packet(void *obj, ProtobufCMessage *msg, struct cr_img *i
 	struct sk_packet *pkt = obj;
 
 	pkt->entry = pb_msg(msg, SkPacketEntry);
-
+	pkt->scm = NULL;
 	pkt->data = xmalloc(pkt->entry->length);
 	if (pkt->data ==NULL)
 		return -1;
 
 	/*
+	 * See dump_packet_cmsg() -- only SCM_RIGHTS are supported and
+	 * only 1 of that kind is possible, thus not more than 1 SCMs
+	 * on a packet.
+	 */
+	if (pkt->entry->n_scm > 1) {
+		pr_err("More than 1 SCM is not possible\n");
+		return -1;
+	}
+
+	/*
 	 * NOTE: packet must be added to the tail. Otherwise sequence
 	 * will be broken.
 	 */
@@ -64,6 +76,50 @@ struct collect_image_info sk_queues_cinfo = {
 	.collect = collect_one_packet,
 };
 
+static int dump_scm_rights(struct cmsghdr *ch, SkPacketEntry *pe)
+{
+	int nr_fds, *fds, i;
+	void *buf;
+	ScmEntry *scme;
+
+	nr_fds = (ch->cmsg_len - sizeof(*ch)) / sizeof(int);
+	fds = (int *)CMSG_DATA(ch);
+
+	buf = xmalloc(sizeof(ScmEntry) + nr_fds * sizeof(uint32_t));
+	if (!buf)
+		return -1;
+
+	scme = xptr_pull(&buf, ScmEntry);
+	scm_entry__init(scme);
+	scme->type = SCM_RIGHTS;
+	scme->n_rights = nr_fds;
+	scme->rights = xptr_pull_s(&buf, nr_fds * sizeof(uint32_t));
+
+	for (i = 0; i < nr_fds; i++) {
+		int ftyp;
+
+		if (dump_my_file(fds[i], &scme->rights[i], &ftyp))
+			return -1;
+
+		/*
+		 * Unix sent over Unix or Epoll with some other sh*t
+		 * sent over unix (maybe with this very unix polled)
+		 * are tricky and not supported for now. (XXX -- todo)
+		 */
+		if (ftyp == FD_TYPES__UNIXSK || ftyp == FD_TYPES__EVENTPOLL) {
+			pr_err("Can't dump send %d (unix/epoll) fd\n", ftyp);
+			return -1;
+		}
+	}
+
+	i = pe->n_scm++;
+	if (xrealloc_safe(&pe->scm, pe->n_scm * sizeof(ScmEntry*)))
+		return -1;
+
+	pe->scm[i] = scme;
+	return 0;
+}
+
 /*
  * Maximum size of the control messages. XXX -- is there any
  * way to get this value out of the kernel?
@@ -73,8 +129,26 @@ struct collect_image_info sk_queues_cinfo = {
 static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
 {
 	struct cmsghdr *ch;
+	int n_rights = 0;
 
 	for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) {
+		if (ch->cmsg_type == SCM_RIGHTS) {
+			if (n_rights) {
+				/*
+				 * Even if user is sending more than one cmsg with
+				 * rights, kernel merges them alltogether on recv.
+				 */
+				pr_err("Unexpected 2nd SCM_RIGHTS from the kernel\n");
+				return -1;
+			}
+
+			if (dump_scm_rights(ch, pe))
+				return -1;
+
+			n_rights++;
+			continue;
+		}
+
 		pr_err("Control messages in queue, not supported\n");
 		return -1;
 	}
@@ -82,6 +156,18 @@ static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
 	return 0;
 }
 
+static void release_cmsg(SkPacketEntry *pe)
+{
+	int i;
+
+	for (i = 0; i < pe->n_scm; i++)
+		xfree(pe->scm[i]);
+	xfree(pe->scm);
+
+	pe->n_scm = 0;
+	pe->scm = NULL;
+}
+
 int dump_sk_queue(int sock_fd, int sock_id)
 {
 	SkPacketEntry pe = SK_PACKET_ENTRY__INIT;
@@ -181,6 +267,9 @@ int dump_sk_queue(int sock_fd, int sock_id)
 			ret = -EIO;
 			goto err_set_sock;
 		}
+
+		if (pe.scm)
+			release_cmsg(&pe);
 	}
 	ret = 0;
 
@@ -197,6 +286,20 @@ err_brk:
 	return ret;
 }
 
+static void close_scm_fds(struct sk_packet *pkt)
+{
+	int i, *fds, n_fds;
+	struct cmsghdr *ch = (struct cmsghdr *)pkt->scm;
+
+	fds = (int *)CMSG_DATA(ch);
+	n_fds = (ch->cmsg_len - sizeof(struct cmsghdr)) / sizeof(int);
+
+	for (i = 0; i < n_fds; i++) {
+		if (close(fds[i]))
+			pr_warn("scm: Error closing sent fd\n");
+	}
+}
+
 static int send_one_pkt(int fd, struct sk_packet *pkt)
 {
 	int ret;
@@ -209,6 +312,11 @@ static int send_one_pkt(int fd, struct sk_packet *pkt)
 	iov.iov_base = pkt->data;
 	iov.iov_len = entry->length;
 
+	if (pkt->scm != NULL) {
+		mh.msg_controllen = pkt->scm_len;
+		mh.msg_control = pkt->scm;
+	}
+
 	/*
 	 * Don't try to use sendfile here, because it use sendpage() and
 	 * all data are split on pages and a new skb is allocated for
@@ -229,6 +337,9 @@ static int send_one_pkt(int fd, struct sk_packet *pkt)
 		return -1;
 	}
 
+	if (pkt->scm != NULL)
+		close_scm_fds(pkt);
+
 	return 0;
 }
 
@@ -264,3 +375,43 @@ int restore_sk_queue(int fd, unsigned int peer_id)
 out:
 	return ret;
 }
+
+int prepare_scms(void)
+{
+	struct sk_packet *pkt;
+
+	pr_info("Preparing SCMs\n");
+	list_for_each_entry(pkt, &packets_list, list) {
+		SkPacketEntry *pe = pkt->entry;
+		ScmEntry *se;
+		struct cmsghdr *ch;
+
+		if (!pe->n_scm)
+			continue;
+
+		se = pe->scm[0]; /* Only 1 SCM is possible */
+
+		if (se->type == SCM_RIGHTS) {
+			pkt->scm_len = CMSG_SPACE(se->n_rights * sizeof(int));
+			pkt->scm = xmalloc(pkt->scm_len);
+			if (!pkt->scm)
+				return -1;
+
+			ch = (struct cmsghdr *)pkt->scm; /* FIXME -- via msghdr */
+			ch->cmsg_level = SOL_SOCKET;
+			ch->cmsg_type = SCM_RIGHTS;
+			ch->cmsg_len = CMSG_LEN(se->n_rights * sizeof(int));
+
+			if (unix_note_scm_rights(pe->id_for, se->rights,
+						(int *)CMSG_DATA(ch), se->n_rights))
+				return -1;
+
+			continue;
+		}
+
+		pr_err("Unsupported scm %d in image\n", se->type);
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index f4ad618..6a9a4ca 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -798,6 +798,7 @@ struct unix_sk_info {
 	struct file_desc d;
 	struct list_head connected; /* List of sockets, connected to me */
 	struct list_head node; /* To link in peer's connected list  */
+	struct list_head scm_fles;
 
 	/*
 	 * For DGRAM sockets with queues, we should only restore the queue
@@ -809,6 +810,11 @@ struct unix_sk_info {
 	u8 listen:1;
 };
 
+struct scm_fle {
+	struct list_head l;
+	struct fdinfo_list_entry *fle;
+};
+
 #define USK_PAIR_MASTER		0x1
 #define USK_PAIR_SLAVE		0x2
 
@@ -824,6 +830,116 @@ static struct unix_sk_info *find_unix_sk_by_ino(int ino)
 	return NULL;
 }
 
+static struct unix_sk_info *find_queuer_for(int id)
+{
+	struct unix_sk_info *ui;
+
+	list_for_each_entry(ui, &unix_sockets, list) {
+		if (ui->queuer == id)
+			return ui;
+	}
+
+	return NULL;
+}
+
+int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids)
+{
+	struct unix_sk_info *ui;
+	struct pstree_item *owner;
+	int i;
+
+	ui = find_queuer_for(id_for);
+	if (!ui) {
+		pr_err("Can't find sender for %d\n", id_for);
+		return -1;
+	}
+
+	pr_info("Found queuer for %d -> %d\n", id_for, ui->ue->id);
+	/*
+	 * This is the task that will restore this socket
+	 */
+	owner = file_master(&ui->d)->task;
+
+	pr_info("-> will set up deps\n");
+	/*
+	 * The ui will send data to the rights receiver. Add a fake fle
+	 * for the file and a dependency.
+	 */
+	for (i = 0; i < n_ids; i++) {
+		struct file_desc *tgt;
+		struct fdinfo_list_entry *fle;
+		struct scm_fle *sfle;
+		FdinfoEntry *e;
+		int fd;
+
+		tgt = find_file_desc_raw(FD_TYPES__UND, file_ids[i]);
+		if (!tgt) {
+			pr_err("Can't find fdesc to send\n");
+			return -1;
+		}
+
+		pr_info("`- Found %d file\n", file_ids[i]);
+		fd = find_unused_fd(owner, -1);
+
+		fle = try_file_master(tgt);
+		if (fle) {
+			e = dup_fdinfo(fle->fe, fd, 0);
+			if (!e) {
+				pr_err("Can't duplicate fdinfo for scm\n");
+				return -1;
+			}
+		} else {
+			/*
+			 * This can happen if the file in question is
+			 * sent over the socket and closed. In this case
+			 * we need to ... invent a new one!
+			 */
+
+			e = xmalloc(sizeof(*e));
+			if (!e)
+				return -1;
+
+			fdinfo_entry__init(e);
+			e->id = tgt->id;
+			e->type = tgt->ops->type;
+			e->fd = fd;
+			e->flags = 0;
+		}
+
+		pr_info("scm: add %d -> %d.fd[%d]\n", tgt->id, vpid(owner), fd);
+		sfle = xmalloc(sizeof(*sfle));
+		if (!sfle)
+			return -1;
+
+		sfle->fle = collect_fd_to(vpid(owner), e, rsti(owner), tgt, false);
+		if (!sfle->fle) {
+			pr_err("Can't request new fle for scm\n");
+			return -1;
+		}
+
+		list_add_tail(&sfle->l, &ui->scm_fles);
+		fds[i] = fd;
+	}
+
+	return 0;
+}
+
+static int chk_restored_scms(struct unix_sk_info *ui)
+{
+	struct scm_fle *sf, *n;
+
+	list_for_each_entry_safe(sf, n, &ui->scm_fles, l) {
+		if (sf->fle->stage != FLE_RESTORED)
+			return 1;
+
+		/* Optimization for the next pass */
+		list_del(&sf->l);
+		xfree(sf);
+	}
+
+	return 0;
+}
+
 static int wake_connected_sockets(struct unix_sk_info *ui)
 {
 	struct fdinfo_list_entry *fle;
@@ -1322,12 +1438,18 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
 	struct unix_sk_info *ui;
 	int ret;
 
+	ui = container_of(d, struct unix_sk_info, d);
+
+	/* FIXME -- only queue restore may be postponed */
+	if (chk_restored_scms(ui)) {
+		pr_info("scm: Wait for tgt to restore\n");
+		return 1;
+	}
+
 	fle = file_master(d);
 	if (fle->stage >= FLE_OPEN)
 		return post_open_unix_sk(d, fle->fe->fd);
 
-	ui = container_of(d, struct unix_sk_info, d);
-
 	if (inherited_fd(d, new_fd)) {
 		ui->ue->uflags |= USK_INHERIT;
 		ret = *new_fd >= 0 ? 0 : -1;
@@ -1440,6 +1562,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
 	ui->listen = 0;
 	INIT_LIST_HEAD(&ui->connected);
 	INIT_LIST_HEAD(&ui->node);
+	INIT_LIST_HEAD(&ui->scm_fles);
 	ui->flags = 0;
 	fixup_sock_net_ns_id(&ui->ue->ns_id, &ui->ue->has_ns_id);
 
diff --git a/images/sk-packet.proto b/images/sk-packet.proto
index 27b48e4..009b461 100644
--- a/images/sk-packet.proto
+++ b/images/sk-packet.proto
@@ -1,8 +1,14 @@
 syntax = "proto2";
 
+message scm_entry {
+	required uint32			type		= 1;
+	repeated uint32			rights		= 2;
+}
+
 message sk_packet_entry {
 	required uint32		id_for		= 1;
 	required uint32		length		= 2;
 	// optional bytes		addr	= 3;
 	// optional sk_ucred_entry	ucred	= 128;
+	repeated scm_entry	scm		= 4;
 }
-- 
2.1.4



More information about the CRIU mailing list