[CRIU] [PATCH RFC 30/30] files: Kill struct file_desc_ops::post_open

Kirill Tkhai ktkhai at virtuozzo.com
Tue Nov 1 07:35:09 PDT 2016


This make open fds asynchronous. Now we restore them in a poll-style.
Every ->open callback operates like a poll. It returns ORV_AGAIN,
if some events have not received yet, or ORV_OK, if restoring of a fle
has finished.

We iterate over list of file descriptors and look at its return value.
If there were not progress in restoring of a fle, we sleep on task_st
futex. As all events are sent together with changing task_st, we will
be woken up, when there is a some new work for us.

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 criu/autofs.c        |   25 ++++++++
 criu/eventpoll.c     |   11 +++-
 criu/files.c         |  153 ++++++++++++++++++++++----------------------------
 criu/include/files.h |    5 --
 criu/include/tty.h   |    2 +
 criu/sk-inet.c       |   14 +++--
 criu/sk-unix.c       |   15 ++++-
 criu/tty.c           |    8 +--
 8 files changed, 123 insertions(+), 110 deletions(-)

diff --git a/criu/autofs.c b/criu/autofs.c
index 3abf404..e616294 100644
--- a/criu/autofs.c
+++ b/criu/autofs.c
@@ -890,6 +890,29 @@ static int autofs_create_fle(struct pstree_item *task, FdinfoEntry *fe,
 	return 0;
 }
 
+static int autofs_open_pipefd(struct file_desc *d, int *new_fd)
+{
+	struct fdinfo_list_entry *fle = file_master(d);
+	int ret;
+
+	if (fle->stage < FLE_OPEN) {
+		ret = open_pipe(d, new_fd);
+		if (ret != ORV_OK)
+			return ret;
+		/*
+		 * We may go further and call post_open right now,
+		 * but common code wants to set flags for this fd.
+		 * Let's allow it do do that, before fd is closed.
+		 * Two below lines will be deleted, when ghost files
+		 * will be implemented.
+		 */
+		set_fds_event(fle->pid);
+		return ORV_AGAIN;
+	}
+
+	return autofs_post_open(d, fle->fe->fd);
+}
+
 static int autofs_create_pipe(struct pstree_item *task, autofs_info_t *i,
 			      struct fdinfo_list_entry *ple)
 {
@@ -906,7 +929,7 @@ static int autofs_create_pipe(struct pstree_item *task, autofs_info_t *i,
 	if (!ops)
 		return -1;
 	memcpy(ops, pi->d.ops, sizeof(*ops));
-	ops->post_open = autofs_post_open;
+	ops->open = autofs_open_pipefd;
 
 	pe = shmalloc(sizeof(*pe));
 	if (!pe)
diff --git a/criu/eventpoll.c b/criu/eventpoll.c
index 3def66c..36f52b9 100644
--- a/criu/eventpoll.c
+++ b/criu/eventpoll.c
@@ -114,6 +114,8 @@ const struct fdtype_ops eventpoll_dump_ops = {
 	.dump		= dump_one_eventpoll,
 };
 
+static int eventpoll_post_open(struct file_desc *d, int fd);
+
 static int eventpoll_open(struct file_desc *d, int *new_fd)
 {
 	struct fdinfo_list_entry *fle = file_master(d);
@@ -122,6 +124,9 @@ static int eventpoll_open(struct file_desc *d, int *new_fd)
 
 	info = container_of(d, struct eventpoll_file_info, d);
 
+	if (fle->stage >= FLE_OPEN)
+		goto post_open;
+
 	pr_info_eventpoll("Restore ", info->efe);
 
 	tmp = epoll_create(1);
@@ -140,7 +145,8 @@ static int eventpoll_open(struct file_desc *d, int *new_fd)
 	fle->stage = FLE_MAY_BE_POLLED;
 
 	*new_fd = tmp;
-	return ORV_OK;
+post_open:
+	return eventpoll_post_open(d, fle->fe->fd);
 err_close:
 	close(tmp);
 	return -1;
@@ -204,7 +210,7 @@ static int eventpoll_post_open(struct file_desc *d, int fd)
 
 	}
 
-	return 0;
+	return ORV_OK;
 }
 
 static void eventpoll_collect_fd(struct file_desc *d,
@@ -216,7 +222,6 @@ static void eventpoll_collect_fd(struct file_desc *d,
 static struct file_desc_ops desc_ops = {
 	.type = FD_TYPES__EVENTPOLL,
 	.open = eventpoll_open,
-	.post_open = eventpoll_post_open,
 	.collect_fd = eventpoll_collect_fd,
 };
 
diff --git a/criu/files.c b/criu/files.c
index 67d279d..8bc8d70 100644
--- a/criu/files.c
+++ b/criu/files.c
@@ -149,39 +149,6 @@ unsigned int find_unused_fd(struct list_head *head, int hint_fd)
 	return fd;
 }
 
-/*
- * A file may be shared between several file descriptors. E.g
- * when doing a fork() every fd of a forker and respective fds
- * of the child have such. Another way of getting shared files
- * is by dup()-ing them or sending them via unix sockets in
- * SCM_RIGHTS message.
- *
- * We restore this type of things in 3 steps (states[] below)
- *
- * 1. Prepare step.
- *    Select which task will create the file (open() one, or
- *    call any other syscall for than (socket, pipe, etc.). All
- *    the others, that share one, create unix sockets under the
- *    respective file descriptor (transport socket).
- * 2. Open step.
- *    The one who creates the file (the 'master') creates one,
- *    then creates one more unix socket (transport) and sends the
- *    created file over this socket to the other recipients.
- * 3. Receive step.
- *    Those, who wait for the file to appear, receive one via
- *    the transport socket, then close the socket and dup() the
- *    received file descriptor into its place.
- *
- * There's the 4th step in the states[] array -- the post_open
- * one. This one is not about file-sharing resolving, but about
- * doing something with a file using it's 'desired' fd. The
- * thing is that while going the 3-step process above, the file
- * may appear in variuos places in the task's fd table, and if
- * we want to do something with it's _final_ descriptor value,
- * we should wait for it to appear there. So the post_open is
- * called when the file is finally set into its place.
- */
-
 struct fdinfo_list_entry *file_master(struct file_desc *d)
 {
 	if (list_empty(&d->fd_info_head)) {
@@ -859,14 +826,7 @@ struct fd_open_state {
 	int (*cb)(int, struct fdinfo_list_entry *);
 };
 
-static int open_fd(int pid, struct fdinfo_list_entry *fle);
 static int receive_fd(int pid, struct fdinfo_list_entry *fle);
-static int post_open_fd(int pid, struct fdinfo_list_entry *fle);
-
-static struct fd_open_state states[] = {
-	{ "create",		open_fd,	},
-	{ "post_create",	post_open_fd,	},
-};
 
 static void transport_name_gen(struct sockaddr_un *addr, int *len, int pid)
 {
@@ -977,29 +937,6 @@ static int send_fd_to_self(int fd, struct fdinfo_list_entry *fle)
 	return set_fds_event(fle->pid);
 }
 
-static int post_open_fd(int pid, struct fdinfo_list_entry *fle)
-{
-	struct file_desc *d = fle->desc;
-
-	if (fle != file_master(d)) {
-		if (receive_fd(pid, fle) != ORV_OK) {
-			pr_err("Can't receive\n");
-			return -1;
-		}
-		if (!is_service_fd(fle->fe->fd, CTL_TTY_OFF))
-			goto out;
-	}
-
-	if (!d->ops->post_open)
-		goto out;
-	if (d->ops->post_open(d, fle->fe->fd))
-		return -1;
-out:
-	fle->stage = FLE_RESTORED;
-	return 0;
-}
-
-
 static int serve_out_fd(int pid, int fd, struct file_desc *d)
 {
 	int ret;
@@ -1024,16 +961,10 @@ static int serve_out_fd(int pid, int fd, struct file_desc *d)
 	return ret;
 }
 
-static int open_fd(int pid, struct fdinfo_list_entry *fle)
+static int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd)
 {
 	struct file_desc *d = fle->desc;
-	int new_fd;
-
-	if (fle != file_master(d))
-		return 0;
-
-	if (d->ops->open(d, &new_fd) < 0)
-		return -1;
+	pid_t pid = fle->pid;
 
 	if (reopen_fd_as(fle->fe->fd, new_fd))
 		return -1;
@@ -1046,7 +977,37 @@ static int open_fd(int pid, struct fdinfo_list_entry *fle)
 	if (fle->stage < FLE_OPEN)
 		fle->stage = FLE_OPEN;
 
-	return serve_out_fd(pid, fle->fe->fd, d);
+	if (serve_out_fd(pid, fle->fe->fd, d))
+		return -1;
+	return 0;
+}
+
+static int open_fd(int pid, struct fdinfo_list_entry *fle)
+{
+	struct file_desc *d = fle->desc;
+	struct fdinfo_list_entry *flem;
+	int new_fd = -1, ret;
+
+	flem = file_master(d);
+	if (fle != flem) {
+		BUG_ON (fle->stage != FLE_INITIALIZED);
+		ret = receive_fd(pid, fle);
+		if (ret != ORV_OK)
+			return ret;
+
+		fle->stage = FLE_RESTORED;
+		return ORV_OK;
+	}
+
+	ret = d->ops->open(d, &new_fd);
+	if (ret != ORV_ERR && new_fd >= 0) {
+		if (setup_and_serve_out(fle, new_fd) < 0)
+			return ORV_ERR;
+	}
+
+	if (ret == ORV_OK)
+		fle->stage = FLE_RESTORED;
+	return ret;
 }
 
 static int receive_fd(int pid, struct fdinfo_list_entry *fle)
@@ -1070,25 +1031,43 @@ static int receive_fd(int pid, struct fdinfo_list_entry *fle)
 	return 0;
 }
 
-static int open_fdinfo(int pid, struct fdinfo_list_entry *fle, int state)
-{
-	pr_info("\tRestoring fd %d (state -> %s)\n",
-			fle->fe->fd, states[state].name);
-	return states[state].cb(pid, fle);
-}
-
 static int open_fdinfos(int pid, struct list_head *list)
 {
-	int state, ret = 0;
-	struct fdinfo_list_entry *fle;
+	struct fdinfo_list_entry *fle, *tmp, *service_fle = NULL;
+	LIST_HEAD(completed);
+	bool progress, again;
+	int st, ret = 0;
 
-	for (state = 0; state < ARRAY_SIZE(states); state++) {
-		list_for_each_entry(fle, list, ps_list) {
-			ret = open_fdinfo(pid, fle, state);
-			if (ret)
-				break;
+	do {
+		progress = again = false;
+
+		list_for_each_entry_safe(fle, tmp, list, ps_list) {
+			st = fle->stage;
+			BUG_ON(st == FLE_RESTORED);
+			ret = open_fd(pid, fle);
+			if (ret == ORV_ERR)
+				goto splice;
+			if (st != fle->stage || ret == ORV_OK)
+				progress = true;
+			if (ret == ORV_OK) {
+				list_del(&fle->ps_list);
+				list_add(&fle->ps_list, &completed);
+			}
+			if (ret == ORV_AGAIN)
+			       again = true;
+			if (fle->fe->fd == get_service_fd(CTL_TTY_OFF))
+				service_fle = fle;
 		}
-	}
+		if (!progress && again)
+			wait_fds_event();
+	} while (again || progress);
+
+	BUG_ON(!list_empty(list));
+splice:
+	list_splice(&completed, list);
+
+	if (ret == ORV_OK && service_fle)
+		ret = tty_restore_ctl_terminal(service_fle->desc, service_fle->fe->fd);
 
 	return ret;
 }
diff --git a/criu/include/files.h b/criu/include/files.h
index 51dae22..6512a07 100644
--- a/criu/include/files.h
+++ b/criu/include/files.h
@@ -115,11 +115,6 @@ struct file_desc_ops {
 	 */
 	int			(*open)(struct file_desc *d, int *new_fd);
 	/*
-	 * Called on a file when all files of that type are opened
-	 * and with the fd being the "restored" one.
-	 */
-	int			(*post_open)(struct file_desc *d, int fd);
-	/*
 	 * Called to collect a new fd before adding it on desc. Clients
 	 * may chose to collect it to some specific rst_info list. See
 	 * prepare_fds() for details.
diff --git a/criu/include/tty.h b/criu/include/tty.h
index 2ff32c0..6fa00db 100644
--- a/criu/include/tty.h
+++ b/criu/include/tty.h
@@ -32,6 +32,8 @@ extern int prepare_shared_tty(void);
 extern int tty_prep_fds(void);
 extern void tty_fini_fds(void);
 
+extern int tty_restore_ctl_terminal(struct file_desc *d, int fd);
+
 #define OPT_SHELL_JOB	"shell-job"
 
 #endif /* __CR_TTY_H__ */
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
index 6352b2b..63084a0 100644
--- a/criu/sk-inet.c
+++ b/criu/sk-inet.c
@@ -453,7 +453,6 @@ static int post_open_inet_sk(struct file_desc *d, int sk);
 static struct file_desc_ops inet_desc_ops = {
 	.type = FD_TYPES__INETSK,
 	.open = open_inet_sk,
-	.post_open = post_open_inet_sk,
 };
 
 static inline int tcp_connection(InetSkEntry *ie)
@@ -548,14 +547,14 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
 	if (ii->ie->opts->reuseaddr)
 		return 0;
 
-	while (atomic_read(&ii->port->users))
-		wait_fds_event();
+	if (atomic_read(&ii->port->users))
+		return ORV_AGAIN;
 
 	val = ii->ie->opts->reuseaddr;
 	if (restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, &val))
 		return -1;
 
-	return 0;
+	return ORV_OK;
 }
 
 int restore_ip_opts(int sk, IpOptsEntry *ioe)
@@ -570,10 +569,14 @@ int restore_ip_opts(int sk, IpOptsEntry *ioe)
 
 static int open_inet_sk(struct file_desc *d, int *new_fd)
 {
+	struct fdinfo_list_entry *fle = file_master(d);
 	struct inet_sk_info *ii;
 	InetSkEntry *ie;
 	int sk, yes = 1;
 
+	if (fle->stage >= FLE_OPEN)
+		goto post_open;
+
 	ii = container_of(d, struct inet_sk_info, d);
 	ie = ii->ie;
 
@@ -667,7 +670,8 @@ static int open_inet_sk(struct file_desc *d, int *new_fd)
 		goto err;
 
 	*new_fd = sk;
-	return ORV_OK;
+post_open:
+	return post_open_inet_sk(d, fle->fe->fd);
 
 err:
 	close(sk);
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index b60ed54..be22a42 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -902,8 +902,8 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
 
 	/* Skip external sockets */
 	if (!list_empty(&peer->d.fd_info_head))
-		while (peer_is_not_prepared(peer))
-			wait_fds_event();
+		if (peer_is_not_prepared(peer))
+			return ORV_AGAIN;
 
 	if (ui->ue->uflags & USK_INHERIT)
 		return 0;
@@ -1278,9 +1278,14 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
 
 static int open_unix_sk(struct file_desc *d, int *new_fd)
 {
+	struct fdinfo_list_entry *fle;
 	struct unix_sk_info *ui;
 	int ret;
 
+	fle = file_master(d);
+	if (fle->stage >= FLE_OPEN)
+		goto post_open;
+
 	ui = container_of(d, struct unix_sk_info, d);
 
 	if (inherited_fd(d, new_fd)) {
@@ -1293,7 +1298,10 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
 	else
 		ret = open_unixsk_standalone(ui, new_fd);
 
-	return ret;
+	if (ret != ORV_OK)
+		return ret;
+post_open:
+	return post_open_unix_sk(d, fle->fe->fd);
 }
 
 static char *socket_d_name(struct file_desc *d, char *buf, size_t s)
@@ -1314,7 +1322,6 @@ static char *socket_d_name(struct file_desc *d, char *buf, size_t s)
 static struct file_desc_ops unix_desc_ops = {
 	.type = FD_TYPES__UNIXSK,
 	.open = open_unix_sk,
-	.post_open = post_open_unix_sk,
 	.name = socket_d_name,
 };
 
diff --git a/criu/tty.c b/criu/tty.c
index 1bf772b..a745e41 100644
--- a/criu/tty.c
+++ b/criu/tty.c
@@ -659,7 +659,7 @@ static int tty_set_prgp(int fd, int group)
 	return 0;
 }
 
-static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
+int tty_restore_ctl_terminal(struct file_desc *d, int fd)
 {
 	struct tty_info *info = container_of(d, struct tty_info, d);
 	struct tty_driver *driver = info->driver;
@@ -667,8 +667,7 @@ static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
 	struct file_desc *slave_d;
 	int slave = -1, ret = -1, index = -1;
 
-	if (!is_service_fd(fd, CTL_TTY_OFF))
-		return 0;
+	BUG_ON(!is_service_fd(fd, CTL_TTY_OFF));
 
 	if (driver->type == TTY_TYPE__EXT_TTY) {
 		slave = -1;
@@ -708,7 +707,7 @@ static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
 err:
 	pty_free_fake_reg(&fake);
 	close(fd);
-	return ret;
+	return ret ? ORV_ERR : ORV_OK;
 }
 
 static bool tty_is_master(struct tty_info *info)
@@ -1149,7 +1148,6 @@ static char *tty_d_name(struct file_desc *d, char *buf, size_t s)
 static struct file_desc_ops tty_desc_ops = {
 	.type		= FD_TYPES__TTY,
 	.open		= tty_open,
-	.post_open	= tty_restore_ctl_terminal,
 	.collect_fd	= tty_collect_fd,
 	.name		= tty_d_name,
 };



More information about the CRIU mailing list