[CRIU] [PATCH v5 26/33] files: Kill struct file_desc_ops::post_open

Kirill Tkhai ktkhai at virtuozzo.com
Mon Dec 26 06:29:16 PST 2016


This make open fds asynchronous. Now we restore them in a poll-style.
Every ->open callback operates like a poll. It returns ORV_AGAIN,
if some events have not received yet, or ORV_OK, if restoring of a fle
has finished.

We iterate over list of file descriptors and look at its return value.
If there were not progress in restoring of a fle, we sleep on task_st
futex. As all events are sent together with changing task_st, we will
be woken up, when there is a some new work for us.

v5: Clear FDS_EVENT every beginning of cycle.
    Use 0, -1 and 1 for successful return error and "again" request.

v4: unix: 1)standalone sockets return ORV_AGAIN after first open
            to reopen fd by generic code
          2)do not call post_open_unix_sk() for !standalone sockets
    inet, epoll: return ORV_AGAIN after first open to reopen fd by generic code
    autofs: iterate used list instead of fds list

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 criu/autofs.c        |   22 ++++++-
 criu/eventpoll.c     |    9 ++-
 criu/files.c         |  160 +++++++++++++++++++++++---------------------------
 criu/include/files.h |    5 --
 criu/include/tty.h   |    2 +
 criu/sk-inet.c       |   12 ++--
 criu/sk-unix.c       |   12 +++-
 criu/tty.c           |    8 +--
 8 files changed, 119 insertions(+), 111 deletions(-)

diff --git a/criu/autofs.c b/criu/autofs.c
index d9f518fdd..c8237dba9 100644
--- a/criu/autofs.c
+++ b/criu/autofs.c
@@ -832,7 +832,7 @@ static struct fdinfo_list_entry *find_fle_by_fd(struct list_head *head, int fd)
 {
 	struct fdinfo_list_entry *fle;
 
-	list_for_each_entry(fle, head, ps_list) {
+	list_for_each_entry(fle, head, used_list) {
 		if (fle->fe->fd == fd)
 			return fle;
 	}
@@ -848,7 +848,7 @@ static struct fdinfo_list_entry *autofs_pipe_le(struct pstree_item *master,
 	if (entry->has_read_fd)
 		pipe_fd = entry->read_fd;
 
-	ple = find_fle_by_fd(&rsti(master)->fds, pipe_fd);
+	ple = find_fle_by_fd(&rsti(master)->used, pipe_fd);
 	if (!ple) {
 		pr_err("Failed to find pipe fd %d in process %d\n",
 				pipe_fd, master->pid.virt);
@@ -887,6 +887,22 @@ static int autofs_create_fle(struct pstree_item *task, FdinfoEntry *fe,
 	return 0;
 }
 
+static int autofs_open_pipefd(struct file_desc *d, int *new_fd)
+{
+	struct fdinfo_list_entry *fle = file_master(d);
+	int ret;
+
+	if (fle->stage < FLE_OPEN) {
+		ret = open_pipe(d, new_fd);
+		if (ret != 0)
+			return ret;
+		set_fds_event(fle->pid);
+		return 1;
+	}
+
+	return autofs_post_open(d, fle->fe->fd);
+}
+
 static int autofs_create_pipe(struct pstree_item *task, autofs_info_t *i,
 			      struct fdinfo_list_entry *ple)
 {
@@ -903,7 +919,7 @@ static int autofs_create_pipe(struct pstree_item *task, autofs_info_t *i,
 	if (!ops)
 		return -1;
 	memcpy(ops, pi->d.ops, sizeof(*ops));
-	ops->post_open = autofs_post_open;
+	ops->open = autofs_open_pipefd;
 
 	pe = shmalloc(sizeof(*pe));
 	if (!pe)
diff --git a/criu/eventpoll.c b/criu/eventpoll.c
index aaea54a56..28d68f7e9 100644
--- a/criu/eventpoll.c
+++ b/criu/eventpoll.c
@@ -114,13 +114,19 @@ const struct fdtype_ops eventpoll_dump_ops = {
 	.dump		= dump_one_eventpoll,
 };
 
+static int eventpoll_post_open(struct file_desc *d, int fd);
+
 static int eventpoll_open(struct file_desc *d, int *new_fd)
 {
+	struct fdinfo_list_entry *fle = file_master(d);
 	struct eventpoll_file_info *info;
 	int tmp;
 
 	info = container_of(d, struct eventpoll_file_info, d);
 
+	if (fle->stage >= FLE_OPEN)
+		return eventpoll_post_open(d, fle->fe->fd);
+
 	pr_info_eventpoll("Restore ", info->efe);
 
 	tmp = epoll_create(1);
@@ -137,7 +143,7 @@ static int eventpoll_open(struct file_desc *d, int *new_fd)
 	}
 
 	*new_fd = tmp;
-	return 0;
+	return 1;
 err_close:
 	close(tmp);
 	return -1;
@@ -218,7 +224,6 @@ static void eventpoll_collect_fd(struct file_desc *d,
 static struct file_desc_ops desc_ops = {
 	.type = FD_TYPES__EVENTPOLL,
 	.open = eventpoll_open,
-	.post_open = eventpoll_post_open,
 	.collect_fd = eventpoll_collect_fd,
 };
 
diff --git a/criu/files.c b/criu/files.c
index 28090c476..c8a9cae99 100644
--- a/criu/files.c
+++ b/criu/files.c
@@ -178,38 +178,6 @@ void wait_fds_event(void)
 	futex_wait_if_cond(f, FDS_EVENT, &);
 	clear_fds_event();
 }
-/*
- * A file may be shared between several file descriptors. E.g
- * when doing a fork() every fd of a forker and respective fds
- * of the child have such. Another way of getting shared files
- * is by dup()-ing them or sending them via unix sockets in
- * SCM_RIGHTS message.
- *
- * We restore this type of things in 3 steps (states[] below)
- *
- * 1. Prepare step.
- *    Select which task will create the file (open() one, or
- *    call any other syscall for than (socket, pipe, etc.). All
- *    the others, that share one, create unix sockets under the
- *    respective file descriptor (transport socket).
- * 2. Open step.
- *    The one who creates the file (the 'master') creates one,
- *    then creates one more unix socket (transport) and sends the
- *    created file over this socket to the other recipients.
- * 3. Receive step.
- *    Those, who wait for the file to appear, receive one via
- *    the transport socket, then close the socket and dup() the
- *    received file descriptor into its place.
- *
- * There's the 4th step in the states[] array -- the post_open
- * one. This one is not about file-sharing resolving, but about
- * doing something with a file using it's 'desired' fd. The
- * thing is that while going the 3-step process above, the file
- * may appear in variuos places in the task's fd table, and if
- * we want to do something with it's _final_ descriptor value,
- * we should wait for it to appear there. So the post_open is
- * called when the file is finally set into its place.
- */
 
 struct fdinfo_list_entry *file_master(struct file_desc *d)
 {
@@ -888,14 +856,7 @@ struct fd_open_state {
 	int (*cb)(int, struct fdinfo_list_entry *);
 };
 
-static int open_fd(int pid, struct fdinfo_list_entry *fle);
 static int receive_fd(int pid, struct fdinfo_list_entry *fle);
-static int post_open_fd(int pid, struct fdinfo_list_entry *fle);
-
-static struct fd_open_state states[] = {
-	{ "create",		open_fd,	},
-	{ "post_create",	post_open_fd,	},
-};
 
 static void transport_name_gen(struct sockaddr_un *addr, int *len, int pid)
 {
@@ -1004,29 +965,6 @@ static int send_fd_to_self(int fd, struct fdinfo_list_entry *fle)
 	return 0;
 }
 
-static int post_open_fd(int pid, struct fdinfo_list_entry *fle)
-{
-	struct file_desc *d = fle->desc;
-
-	if (fle != file_master(d)) {
-		if (receive_fd(pid, fle) != 0) {
-			pr_err("Can't receive\n");
-			return -1;
-		}
-		if (!is_service_fd(fle->fe->fd, CTL_TTY_OFF))
-			goto out;
-	}
-
-	if (!d->ops->post_open)
-		goto out;
-	if (d->ops->post_open(d, fle->fe->fd))
-		return -1;
-out:
-	fle->stage = FLE_RESTORED;
-	return 0;
-}
-
-
 static int serve_out_fd(int pid, int fd, struct file_desc *d)
 {
 	int ret;
@@ -1051,16 +989,10 @@ static int serve_out_fd(int pid, int fd, struct file_desc *d)
 	return ret;
 }
 
-static int open_fd(int pid, struct fdinfo_list_entry *fle)
+static int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd)
 {
 	struct file_desc *d = fle->desc;
-	int new_fd;
-
-	if (fle != file_master(d))
-		return 0;
-
-	if (d->ops->open(d, &new_fd) < 0)
-		return -1;
+	pid_t pid = fle->pid;
 
 	if (reopen_fd_as(fle->fe->fd, new_fd))
 		return -1;
@@ -1073,7 +1005,37 @@ static int open_fd(int pid, struct fdinfo_list_entry *fle)
 	if (fle->stage < FLE_OPEN)
 		fle->stage = FLE_OPEN;
 
-	return serve_out_fd(pid, fle->fe->fd, d);
+	if (serve_out_fd(pid, fle->fe->fd, d))
+		return -1;
+	return 0;
+}
+
+static int open_fd(int pid, struct fdinfo_list_entry *fle)
+{
+	struct file_desc *d = fle->desc;
+	struct fdinfo_list_entry *flem;
+	int new_fd = -1, ret;
+
+	flem = file_master(d);
+	if (fle != flem) {
+		BUG_ON (fle->stage != FLE_INITIALIZED);
+		ret = receive_fd(pid, fle);
+		if (ret != 0)
+			return ret;
+
+		fle->stage = FLE_RESTORED;
+		return 0;
+	}
+
+	ret = d->ops->open(d, &new_fd);
+	if (ret != -1 && new_fd >= 0) {
+		if (setup_and_serve_out(fle, new_fd) < 0)
+			return -1;
+	}
+
+	if (ret == 0)
+		fle->stage = FLE_RESTORED;
+	return ret;
 }
 
 static int receive_fd(int pid, struct fdinfo_list_entry *fle)
@@ -1097,25 +1059,49 @@ static int receive_fd(int pid, struct fdinfo_list_entry *fle)
 	return 0;
 }
 
-static int open_fdinfo(int pid, struct fdinfo_list_entry *fle, int state)
-{
-	pr_info("\tRestoring fd %d (state -> %s)\n",
-			fle->fe->fd, states[state].name);
-	return states[state].cb(pid, fle);
-}
-
 static int open_fdinfos(int pid, struct list_head *list)
 {
-	int state, ret = 0;
-	struct fdinfo_list_entry *fle;
-
-	for (state = 0; state < ARRAY_SIZE(states); state++) {
-		list_for_each_entry(fle, list, ps_list) {
-			ret = open_fdinfo(pid, fle, state);
-			if (ret)
-				break;
+	struct fdinfo_list_entry *fle, *tmp, *service_fle = NULL;
+	LIST_HEAD(completed);
+	bool progress, again;
+	int st, ret = 0;
+
+	do {
+		progress = again = false;
+		clear_fds_event();
+
+		list_for_each_entry_safe(fle, tmp, list, ps_list) {
+			st = fle->stage;
+			BUG_ON(st == FLE_RESTORED);
+			ret = open_fd(pid, fle);
+			if (ret == -1)
+				goto splice;
+			if (st != fle->stage || ret == 0)
+				progress = true;
+			if (ret == 0) {
+				/*
+				 * We delete restored items from fds list,
+				 * so open() methods may base on this feature
+				 * and reduce number of fles in their checks.
+				 */
+				list_del(&fle->ps_list);
+				list_add(&fle->ps_list, &completed);
+			}
+			if (ret == 1)
+			       again = true;
+			if (fle->fe->fd == get_service_fd(CTL_TTY_OFF))
+				service_fle = fle;
 		}
-	}
+		if (!progress && again)
+			wait_fds_event();
+	} while (again || progress);
+
+	BUG_ON(!list_empty(list));
+splice:
+	list_splice(&completed, list);
+
+	if (ret == 0 && service_fle)
+		ret = tty_restore_ctl_terminal(service_fle->desc, service_fle->fe->fd);
 
 	return ret;
 }
diff --git a/criu/include/files.h b/criu/include/files.h
index 093e077d0..7044033e7 100644
--- a/criu/include/files.h
+++ b/criu/include/files.h
@@ -107,11 +107,6 @@ struct file_desc_ops {
 	 */
 	int			(*open)(struct file_desc *d, int *new_fd);
 	/*
-	 * Called on a file when all files of that type are opened
-	 * and with the fd being the "restored" one.
-	 */
-	int			(*post_open)(struct file_desc *d, int fd);
-	/*
 	 * Called to collect a new fd before adding it on desc. Clients
 	 * may chose to collect it to some specific rst_info list. See
 	 * prepare_fds() for details.
diff --git a/criu/include/tty.h b/criu/include/tty.h
index 2ff32c070..6fa00db2c 100644
--- a/criu/include/tty.h
+++ b/criu/include/tty.h
@@ -32,6 +32,8 @@ extern int prepare_shared_tty(void);
 extern int tty_prep_fds(void);
 extern void tty_fini_fds(void);
 
+extern int tty_restore_ctl_terminal(struct file_desc *d, int fd);
+
 #define OPT_SHELL_JOB	"shell-job"
 
 #endif /* __CR_TTY_H__ */
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
index 552f8bba3..8e0050282 100644
--- a/criu/sk-inet.c
+++ b/criu/sk-inet.c
@@ -490,7 +490,6 @@ static int post_open_inet_sk(struct file_desc *d, int sk);
 static struct file_desc_ops inet_desc_ops = {
 	.type = FD_TYPES__INETSK,
 	.open = open_inet_sk,
-	.post_open = post_open_inet_sk,
 };
 
 static inline int tcp_connection(InetSkEntry *ie)
@@ -585,8 +584,8 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
 	if (ii->ie->opts->reuseaddr)
 		return 0;
 
-	while (atomic_read(&ii->port->users))
-		wait_fds_event();
+	if (atomic_read(&ii->port->users))
+		return 1;
 
 	val = ii->ie->opts->reuseaddr;
 	if (restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, &val))
@@ -606,10 +605,14 @@ int restore_ip_opts(int sk, IpOptsEntry *ioe)
 }
 static int open_inet_sk(struct file_desc *d, int *new_fd)
 {
+	struct fdinfo_list_entry *fle = file_master(d);
 	struct inet_sk_info *ii;
 	InetSkEntry *ie;
 	int sk, yes = 1;
 
+	if (fle->stage >= FLE_OPEN)
+		return post_open_inet_sk(d, fle->fe->fd);
+
 	ii = container_of(d, struct inet_sk_info, d);
 	ie = ii->ie;
 
@@ -702,8 +705,7 @@ static int open_inet_sk(struct file_desc *d, int *new_fd)
 		goto err;
 
 	*new_fd = sk;
-	return 0;
-
+	return 1;
 err:
 	close(sk);
 	return -1;
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 845d90fba..423655e4f 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -902,8 +902,8 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
 
 	/* Skip external sockets */
 	if (!list_empty(&peer->d.fd_info_head))
-		while (peer_is_not_prepared(peer))
-			wait_fds_event();
+		if (peer_is_not_prepared(peer))
+			return 1;
 
 	if (ui->ue->uflags & USK_INHERIT)
 		return 0;
@@ -1269,14 +1269,19 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
 		return -1;
 
 	*new_fd = sk;
-	return 0;
+	return 1;
 }
 
 static int open_unix_sk(struct file_desc *d, int *new_fd)
 {
+	struct fdinfo_list_entry *fle;
 	struct unix_sk_info *ui;
 	int ret;
 
+	fle = file_master(d);
+	if (fle->stage >= FLE_OPEN)
+		return post_open_unix_sk(d, fle->fe->fd);
+
 	ui = container_of(d, struct unix_sk_info, d);
 
 	if (inherited_fd(d, new_fd)) {
@@ -1310,7 +1315,6 @@ static char *socket_d_name(struct file_desc *d, char *buf, size_t s)
 static struct file_desc_ops unix_desc_ops = {
 	.type = FD_TYPES__UNIXSK,
 	.open = open_unix_sk,
-	.post_open = post_open_unix_sk,
 	.name = socket_d_name,
 };
 
diff --git a/criu/tty.c b/criu/tty.c
index d367421dc..7d0c58d1a 100644
--- a/criu/tty.c
+++ b/criu/tty.c
@@ -659,7 +659,7 @@ static int tty_set_prgp(int fd, int group)
 	return 0;
 }
 
-static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
+int tty_restore_ctl_terminal(struct file_desc *d, int fd)
 {
 	struct tty_info *info = container_of(d, struct tty_info, d);
 	struct tty_driver *driver = info->driver;
@@ -667,8 +667,7 @@ static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
 	struct file_desc *slave_d;
 	int slave = -1, ret = -1, index = -1;
 
-	if (!is_service_fd(fd, CTL_TTY_OFF))
-		return 0;
+	BUG_ON(!is_service_fd(fd, CTL_TTY_OFF));
 
 	if (driver->type == TTY_TYPE__EXT_TTY) {
 		slave = -1;
@@ -708,7 +707,7 @@ static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
 err:
 	pty_free_fake_reg(&fake);
 	close(fd);
-	return ret;
+	return ret ? -1 : 0;
 }
 
 static bool tty_is_master(struct tty_info *info)
@@ -1152,7 +1151,6 @@ static char *tty_d_name(struct file_desc *d, char *buf, size_t s)
 static struct file_desc_ops tty_desc_ops = {
 	.type		= FD_TYPES__TTY,
 	.open		= tty_open,
-	.post_open	= tty_restore_ctl_terminal,
 	.collect_fd	= tty_collect_fd,
 	.name		= tty_d_name,
 };



More information about the CRIU mailing list