[CRIU] [PATCH 18/18 v2] SCM: Dump and restore SCM_RIGHTs

Pavel Emelyanov xemul at virtuozzo.com
Thu Jul 13 14:24:29 MSK 2017


Most of the pieces has already been described in the previous patches :)
so here's the summary.

* Dump:

When receiving a message, also receive any SCM-s (already there) and when
SCM_RIGHTs one is met -- go ahead and just dump received descriptors using
regular code, but taking current as the victim task.

Few words about file paths resolution -- since we do dump path-ed files
by receiving them from victim's parasite, such files sent via sockets
should still work OK, as we still receive them, just from another socket.

Several problems here:

1. Unix sockets sent via unix sockets form knots. Not supported.
2. Eventpolls sent via unix might themseves poll unix sockets. Knots
   again. Not supported either.

* Restore:

On restore we need to make unix socket wait for the soon-to-be-scm-sent
descriptors to get restored, so we need to find them, then put a dependency.
After that, the fake fdinfo entry is attached to the respective file
descs, when sent the respective descriptors are closed.

https://github.com/xemul/criu/issues/251

v2: Addressed comments from Kirill

* Moved prepare_scms before adding fake fles (with comment)
* Add scm-only fles as fake, thus removing close_scm_fds
* Try hard finding any suitable fle to use as scm one when 
  queuing them for unix socket scm list, only allocate a new
  one if really needed

Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
---
 criu/cr-restore.c      |  23 ++++++++
 criu/include/sockets.h |   2 +
 criu/sk-queue.c        | 140 ++++++++++++++++++++++++++++++++++++++++++++-
 criu/sk-unix.c         | 152 ++++++++++++++++++++++++++++++++++++++++++++++++-
 images/sk-packet.proto |   6 ++
 5 files changed, 318 insertions(+), 5 deletions(-)

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index e14fa06..e11d724 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -359,6 +359,29 @@ static int root_prepare_shared(void)
 	if (ret)
 		goto err;
 
+	/*
+	 * This should be called with all packets collected AND all
+	 * fdescs and fles prepared BUT post-prep-s not run.
+	 *
+	 * Also, add_fake_fds_masters() should go afterwards
+	 *
+	 * 1)It may add a master file there, and this master must be
+	 * resolved in add_fake_fds_masters(). Otherwise the task,
+	 * which is the owner of this just added master, may not have
+	 * rights to create the master (imagine, scm file is a socket 
+	 * of a net_ns, which can't be assigned by the task);
+	 * 
+	 * 2)Another case -- there was not a task, which has
+	 * permittions to create a socket, and you added it in
+	 * prepare_scms(). In this case, we mustn't add one more fle 
+	 * in add_fake_fds_masters() -- and if this function is 
+	 * called after prepare_scms(), it won't add anything. This 
+	 * will reduce number of fake files, we add.
+	 */
+	ret = prepare_scms();
+	if (ret)
+		goto err;
+
 	/* This func may add new files, so it must be called before post prepare */
 	ret = add_fake_fds_masters();
 	if (ret)
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index 3fa8017..1bd5c67 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -38,6 +38,8 @@ extern int collect_sockets(struct ns_id *);
 extern struct collect_image_info inet_sk_cinfo;
 extern struct collect_image_info unix_sk_cinfo;
 extern int fix_external_unix_sockets(void);
+extern int prepare_scms(void);
+extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
 
 extern struct collect_image_info netlink_sk_cinfo;
 
diff --git a/criu/sk-queue.c b/criu/sk-queue.c
index 77e203e..f3ebd6c 100644
--- a/criu/sk-queue.c
+++ b/criu/sk-queue.c
@@ -18,9 +18,9 @@
 #include "util.h"
 #include "util-pie.h"
 #include "sockets.h"
-
+#include "xmalloc.h"
 #include "sk-queue.h"
-
+#include "files.h"
 #include "protobuf.h"
 #include "images/sk-packet.pb-c.h"
 
@@ -28,6 +28,8 @@ struct sk_packet {
 	struct list_head	list;
 	SkPacketEntry		*entry;
 	char        		*data;
+	unsigned		scm_len;
+	int			*scm;
 };
 
 static LIST_HEAD(packets_list);
@@ -37,12 +39,22 @@ static int collect_one_packet(void *obj, ProtobufCMessage *msg, struct cr_img *i
 	struct sk_packet *pkt = obj;
 
 	pkt->entry = pb_msg(msg, SkPacketEntry);
-
+	pkt->scm = NULL;
 	pkt->data = xmalloc(pkt->entry->length);
 	if (pkt->data ==NULL)
 		return -1;
 
 	/*
+	 * See dump_packet_cmsg() -- only SCM_RIGHTS are supported and
+	 * only 1 of that kind is possible, thus not more than 1 SCMs
+	 * on a packet.
+	 */
+	if (pkt->entry->n_scm > 1) {
+		pr_err("More than 1 SCM is not possible\n");
+		return -1;
+	}
+
+	/*
 	 * NOTE: packet must be added to the tail. Otherwise sequence
 	 * will be broken.
 	 */
@@ -64,6 +76,50 @@ struct collect_image_info sk_queues_cinfo = {
 	.collect = collect_one_packet,
 };
 
+static int dump_scm_rights(struct cmsghdr *ch, SkPacketEntry *pe)
+{
+	int nr_fds, *fds, i;
+	void *buf;
+	ScmEntry *scme;
+
+	nr_fds = (ch->cmsg_len - sizeof(*ch)) / sizeof(int);
+	fds = (int *)CMSG_DATA(ch);
+
+	buf = xmalloc(sizeof(ScmEntry) + nr_fds * sizeof(uint32_t));
+	if (!buf)
+		return -1;
+
+	scme = xptr_pull(&buf, ScmEntry);
+	scm_entry__init(scme);
+	scme->type = SCM_RIGHTS;
+	scme->n_rights = nr_fds;
+	scme->rights = xptr_pull_s(&buf, nr_fds * sizeof(uint32_t));
+
+	for (i = 0; i < nr_fds; i++) {
+		int ftyp;
+
+		if (dump_my_file(fds[i], &scme->rights[i], &ftyp))
+			return -1;
+
+		/*
+		 * Unix sent over Unix or Epoll with some other sh*t
+		 * sent over unix (maybe with this very unix polled)
+		 * are tricky and not supported for now. (XXX -- todo)
+		 */
+		if (ftyp == FD_TYPES__UNIXSK || ftyp == FD_TYPES__EVENTPOLL) {
+			pr_err("Can't dump send %d (unix/epoll) fd\n", ftyp);
+			return -1;
+		}
+	}
+
+	i = pe->n_scm++;
+	if (xrealloc_safe(&pe->scm, pe->n_scm * sizeof(ScmEntry*)))
+		return -1;
+
+	pe->scm[i] = scme;
+	return 0;
+}
+
 /*
  * Maximum size of the control messages. XXX -- is there any
  * way to get this value out of the kernel?
@@ -73,8 +129,26 @@ struct collect_image_info sk_queues_cinfo = {
 static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
 {
 	struct cmsghdr *ch;
+	int n_rights = 0;
 
 	for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) {
+		if (ch->cmsg_type == SCM_RIGHTS) {
+			if (n_rights) {
+				/*
+				 * Even if user is sending more than one cmsg with
+				 * rights, kernel merges them alltogether on recv.
+				 */
+				pr_err("Unexpected 2nd SCM_RIGHTS from the kernel\n");
+				return -1;
+			}
+
+			if (dump_scm_rights(ch, pe))
+				return -1;
+
+			n_rights++;
+			continue;
+		}
+
 		pr_err("Control messages in queue, not supported\n");
 		return -1;
 	}
@@ -82,6 +156,18 @@ static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
 	return 0;
 }
 
+static void release_cmsg(SkPacketEntry *pe)
+{
+	int i;
+
+	for (i = 0; i < pe->n_scm; i++)
+		xfree(pe->scm[i]);
+	xfree(pe->scm);
+
+	pe->n_scm = 0;
+	pe->scm = NULL;
+}
+
 int dump_sk_queue(int sock_fd, int sock_id)
 {
 	SkPacketEntry pe = SK_PACKET_ENTRY__INIT;
@@ -181,6 +267,9 @@ int dump_sk_queue(int sock_fd, int sock_id)
 			ret = -EIO;
 			goto err_set_sock;
 		}
+
+		if (pe.scm)
+			release_cmsg(&pe);
 	}
 	ret = 0;
 
@@ -209,6 +298,11 @@ static int send_one_pkt(int fd, struct sk_packet *pkt)
 	iov.iov_base = pkt->data;
 	iov.iov_len = entry->length;
 
+	if (pkt->scm != NULL) {
+		mh.msg_controllen = pkt->scm_len;
+		mh.msg_control = pkt->scm;
+	}
+
 	/*
 	 * Don't try to use sendfile here, because it use sendpage() and
 	 * all data are split on pages and a new skb is allocated for
@@ -264,3 +358,43 @@ int restore_sk_queue(int fd, unsigned int peer_id)
 out:
 	return ret;
 }
+
+int prepare_scms(void)
+{
+	struct sk_packet *pkt;
+
+	pr_info("Preparing SCMs\n");
+	list_for_each_entry(pkt, &packets_list, list) {
+		SkPacketEntry *pe = pkt->entry;
+		ScmEntry *se;
+		struct cmsghdr *ch;
+
+		if (!pe->n_scm)
+			continue;
+
+		se = pe->scm[0]; /* Only 1 SCM is possible */
+
+		if (se->type == SCM_RIGHTS) {
+			pkt->scm_len = CMSG_SPACE(se->n_rights * sizeof(int));
+			pkt->scm = xmalloc(pkt->scm_len);
+			if (!pkt->scm)
+				return -1;
+
+			ch = (struct cmsghdr *)pkt->scm; /* FIXME -- via msghdr */
+			ch->cmsg_level = SOL_SOCKET;
+			ch->cmsg_type = SCM_RIGHTS;
+			ch->cmsg_len = CMSG_LEN(se->n_rights * sizeof(int));
+
+			if (unix_note_scm_rights(pe->id_for, se->rights,
+						(int *)CMSG_DATA(ch), se->n_rights))
+				return -1;
+
+			continue;
+		}
+
+		pr_err("Unsupported scm %d in image\n", se->type);
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 42ce1bb..3963a4a 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -798,6 +798,7 @@ struct unix_sk_info {
 	struct file_desc d;
 	struct list_head connected; /* List of sockets, connected to me */
 	struct list_head node; /* To link in peer's connected list  */
+	struct list_head scm_fles;
 
 	/*
 	 * For DGRAM sockets with queues, we should only restore the queue
@@ -809,6 +810,11 @@ struct unix_sk_info {
 	u8 listen:1;
 };
 
+struct scm_fle {
+	struct list_head l;
+	struct fdinfo_list_entry *fle;
+};
+
 #define USK_PAIR_MASTER		0x1
 #define USK_PAIR_SLAVE		0x2
 
@@ -824,6 +830,141 @@ static struct unix_sk_info *find_unix_sk_by_ino(int ino)
 	return NULL;
 }
 
+static struct unix_sk_info *find_queuer_for(int id)
+{
+	struct unix_sk_info *ui;
+
+	list_for_each_entry(ui, &unix_sockets, list) {
+		if (ui->queuer == id)
+			return ui;
+	}
+
+	return NULL;
+}
+
+static struct fdinfo_list_entry *get_fle_for_scm(struct file_desc *tgt,
+		struct pstree_item *owner)
+{
+	struct fdinfo_list_entry *fle;
+	FdinfoEntry *e = NULL;
+	int fd;
+
+	list_for_each_entry(fle, &tgt->fd_info_head, desc_list) {
+		if (fle->task == owner)
+			/*
+			 * Owner already has this file in its fdtable.
+			 * Just use one.
+			 */
+			return fle;
+
+		e = fle->fe; /* keep any for further reference */
+	}
+
+	/*
+	 * Some other task restores this file. Pretend that
+	 * we're another user of it.
+	 */
+	fd = find_unused_fd(owner, -1);
+	pr_info("`- will add SCM-only %d fd\n", fd);
+
+	if (e != NULL) {
+		e = dup_fdinfo(e, fd, 0);
+		if (!e) {
+			pr_err("Can't duplicate fdinfo for scm\n");
+			return NULL;
+		}
+	} else {
+		/*
+		 * This can happen if the file in question is
+		 * sent over the socket and closed. In this case
+		 * we need to ... invent a new one!
+		 */
+
+		e = xmalloc(sizeof(*e));
+		if (!e)
+			return NULL;
+
+		fdinfo_entry__init(e);
+		e->id = tgt->id;
+		e->type = tgt->ops->type;
+		e->fd = fd;
+		e->flags = 0;
+	}
+
+	/*
+	 * Make this fle fake, so that files collecting engine
+	 * closes them at the end.
+	 */
+	return collect_fd_to(vpid(owner), e, rsti(owner), tgt, true);
+}
+
+int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids)
+{
+	struct unix_sk_info *ui;
+	struct pstree_item *owner;
+	int i;
+
+	ui = find_queuer_for(id_for);
+	if (!ui) {
+		pr_err("Can't find sender for %d\n", id_for);
+		return -1;
+	}
+
+	pr_info("Found queuer for %d -> %d\n", id_for, ui->ue->id);
+	/*
+	 * This is the task that will restore this socket
+	 */
+	owner = file_master(&ui->d)->task;
+
+	pr_info("-> will set up deps\n");
+	/*
+	 * The ui will send data to the rights receiver. Add a fake fle
+	 * for the file and a dependency.
+	 */
+	for (i = 0; i < n_ids; i++) {
+		struct file_desc *tgt;
+		struct scm_fle *sfle;
+
+		tgt = find_file_desc_raw(FD_TYPES__UND, file_ids[i]);
+		if (!tgt) {
+			pr_err("Can't find fdesc to send\n");
+			return -1;
+		}
+
+		pr_info("scm: add file %d -> %d\n", tgt->id, vpid(owner));
+		sfle = xmalloc(sizeof(*sfle));
+		if (!sfle)
+			return -1;
+
+		sfle->fle = get_fle_for_scm(tgt, owner);
+		if (!sfle->fle) {
+			pr_err("Can't request new fle for scm\n");
+			return -1;
+		}
+
+		list_add_tail(&sfle->l, &ui->scm_fles);
+		fds[i] = sfle->fle->fe->fd;
+	}
+
+	return 0;
+}
+
+static int chk_restored_scms(struct unix_sk_info *ui)
+{
+	struct scm_fle *sf, *n;
+
+	list_for_each_entry_safe(sf, n, &ui->scm_fles, l) {
+		if (sf->fle->stage < FLE_OPEN)
+			return 1;
+
+		/* Optimization for the next pass */
+		list_del(&sf->l);
+		xfree(sf);
+	}
+
+	return 0;
+}
+
 static int wake_connected_sockets(struct unix_sk_info *ui)
 {
 	struct fdinfo_list_entry *fle;
@@ -1322,12 +1463,18 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
 	struct unix_sk_info *ui;
 	int ret;
 
+	ui = container_of(d, struct unix_sk_info, d);
+
+	/* FIXME -- only queue restore may be postponed */
+	if (chk_restored_scms(ui)) {
+		pr_info("scm: Wait for tgt to restore\n");
+		return 1;
+	}
+
 	fle = file_master(d);
 	if (fle->stage >= FLE_OPEN)
 		return post_open_unix_sk(d, fle->fe->fd);
 
-	ui = container_of(d, struct unix_sk_info, d);
-
 	if (inherited_fd(d, new_fd)) {
 		ui->ue->uflags |= USK_INHERIT;
 		ret = *new_fd >= 0 ? 0 : -1;
@@ -1440,6 +1587,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
 	ui->listen = 0;
 	INIT_LIST_HEAD(&ui->connected);
 	INIT_LIST_HEAD(&ui->node);
+	INIT_LIST_HEAD(&ui->scm_fles);
 	ui->flags = 0;
 	fixup_sock_net_ns_id(&ui->ue->ns_id, &ui->ue->has_ns_id);
 
diff --git a/images/sk-packet.proto b/images/sk-packet.proto
index 27b48e4..009b461 100644
--- a/images/sk-packet.proto
+++ b/images/sk-packet.proto
@@ -1,8 +1,14 @@
 syntax = "proto2";
 
+message scm_entry {
+	required uint32			type		= 1;
+	repeated uint32			rights		= 2;
+}
+
 message sk_packet_entry {
 	required uint32		id_for		= 1;
 	required uint32		length		= 2;
 	// optional bytes		addr	= 3;
 	// optional sk_ucred_entry	ucred	= 128;
+	repeated scm_entry	scm		= 4;
 }
-- 
2.1.4



More information about the CRIU mailing list