[CRIU] [PATCH RFC 30/30] files: Kill struct file_desc_ops::post_open
Kirill Tkhai
ktkhai at virtuozzo.com
Tue Nov 1 07:35:09 PDT 2016
This make open fds asynchronous. Now we restore them in a poll-style.
Every ->open callback operates like a poll. It returns ORV_AGAIN,
if some events have not received yet, or ORV_OK, if restoring of a fle
has finished.
We iterate over list of file descriptors and look at its return value.
If there were not progress in restoring of a fle, we sleep on task_st
futex. As all events are sent together with changing task_st, we will
be woken up, when there is a some new work for us.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
criu/autofs.c | 25 ++++++++
criu/eventpoll.c | 11 +++-
criu/files.c | 153 ++++++++++++++++++++++----------------------------
criu/include/files.h | 5 --
criu/include/tty.h | 2 +
criu/sk-inet.c | 14 +++--
criu/sk-unix.c | 15 ++++-
criu/tty.c | 8 +--
8 files changed, 123 insertions(+), 110 deletions(-)
diff --git a/criu/autofs.c b/criu/autofs.c
index 3abf404..e616294 100644
--- a/criu/autofs.c
+++ b/criu/autofs.c
@@ -890,6 +890,29 @@ static int autofs_create_fle(struct pstree_item *task, FdinfoEntry *fe,
return 0;
}
+static int autofs_open_pipefd(struct file_desc *d, int *new_fd)
+{
+ struct fdinfo_list_entry *fle = file_master(d);
+ int ret;
+
+ if (fle->stage < FLE_OPEN) {
+ ret = open_pipe(d, new_fd);
+ if (ret != ORV_OK)
+ return ret;
+ /*
+ * We may go further and call post_open right now,
+ * but common code wants to set flags for this fd.
+ * Let's allow it do do that, before fd is closed.
+ * Two below lines will be deleted, when ghost files
+ * will be implemented.
+ */
+ set_fds_event(fle->pid);
+ return ORV_AGAIN;
+ }
+
+ return autofs_post_open(d, fle->fe->fd);
+}
+
static int autofs_create_pipe(struct pstree_item *task, autofs_info_t *i,
struct fdinfo_list_entry *ple)
{
@@ -906,7 +929,7 @@ static int autofs_create_pipe(struct pstree_item *task, autofs_info_t *i,
if (!ops)
return -1;
memcpy(ops, pi->d.ops, sizeof(*ops));
- ops->post_open = autofs_post_open;
+ ops->open = autofs_open_pipefd;
pe = shmalloc(sizeof(*pe));
if (!pe)
diff --git a/criu/eventpoll.c b/criu/eventpoll.c
index 3def66c..36f52b9 100644
--- a/criu/eventpoll.c
+++ b/criu/eventpoll.c
@@ -114,6 +114,8 @@ const struct fdtype_ops eventpoll_dump_ops = {
.dump = dump_one_eventpoll,
};
+static int eventpoll_post_open(struct file_desc *d, int fd);
+
static int eventpoll_open(struct file_desc *d, int *new_fd)
{
struct fdinfo_list_entry *fle = file_master(d);
@@ -122,6 +124,9 @@ static int eventpoll_open(struct file_desc *d, int *new_fd)
info = container_of(d, struct eventpoll_file_info, d);
+ if (fle->stage >= FLE_OPEN)
+ goto post_open;
+
pr_info_eventpoll("Restore ", info->efe);
tmp = epoll_create(1);
@@ -140,7 +145,8 @@ static int eventpoll_open(struct file_desc *d, int *new_fd)
fle->stage = FLE_MAY_BE_POLLED;
*new_fd = tmp;
- return ORV_OK;
+post_open:
+ return eventpoll_post_open(d, fle->fe->fd);
err_close:
close(tmp);
return -1;
@@ -204,7 +210,7 @@ static int eventpoll_post_open(struct file_desc *d, int fd)
}
- return 0;
+ return ORV_OK;
}
static void eventpoll_collect_fd(struct file_desc *d,
@@ -216,7 +222,6 @@ static void eventpoll_collect_fd(struct file_desc *d,
static struct file_desc_ops desc_ops = {
.type = FD_TYPES__EVENTPOLL,
.open = eventpoll_open,
- .post_open = eventpoll_post_open,
.collect_fd = eventpoll_collect_fd,
};
diff --git a/criu/files.c b/criu/files.c
index 67d279d..8bc8d70 100644
--- a/criu/files.c
+++ b/criu/files.c
@@ -149,39 +149,6 @@ unsigned int find_unused_fd(struct list_head *head, int hint_fd)
return fd;
}
-/*
- * A file may be shared between several file descriptors. E.g
- * when doing a fork() every fd of a forker and respective fds
- * of the child have such. Another way of getting shared files
- * is by dup()-ing them or sending them via unix sockets in
- * SCM_RIGHTS message.
- *
- * We restore this type of things in 3 steps (states[] below)
- *
- * 1. Prepare step.
- * Select which task will create the file (open() one, or
- * call any other syscall for than (socket, pipe, etc.). All
- * the others, that share one, create unix sockets under the
- * respective file descriptor (transport socket).
- * 2. Open step.
- * The one who creates the file (the 'master') creates one,
- * then creates one more unix socket (transport) and sends the
- * created file over this socket to the other recipients.
- * 3. Receive step.
- * Those, who wait for the file to appear, receive one via
- * the transport socket, then close the socket and dup() the
- * received file descriptor into its place.
- *
- * There's the 4th step in the states[] array -- the post_open
- * one. This one is not about file-sharing resolving, but about
- * doing something with a file using it's 'desired' fd. The
- * thing is that while going the 3-step process above, the file
- * may appear in variuos places in the task's fd table, and if
- * we want to do something with it's _final_ descriptor value,
- * we should wait for it to appear there. So the post_open is
- * called when the file is finally set into its place.
- */
-
struct fdinfo_list_entry *file_master(struct file_desc *d)
{
if (list_empty(&d->fd_info_head)) {
@@ -859,14 +826,7 @@ struct fd_open_state {
int (*cb)(int, struct fdinfo_list_entry *);
};
-static int open_fd(int pid, struct fdinfo_list_entry *fle);
static int receive_fd(int pid, struct fdinfo_list_entry *fle);
-static int post_open_fd(int pid, struct fdinfo_list_entry *fle);
-
-static struct fd_open_state states[] = {
- { "create", open_fd, },
- { "post_create", post_open_fd, },
-};
static void transport_name_gen(struct sockaddr_un *addr, int *len, int pid)
{
@@ -977,29 +937,6 @@ static int send_fd_to_self(int fd, struct fdinfo_list_entry *fle)
return set_fds_event(fle->pid);
}
-static int post_open_fd(int pid, struct fdinfo_list_entry *fle)
-{
- struct file_desc *d = fle->desc;
-
- if (fle != file_master(d)) {
- if (receive_fd(pid, fle) != ORV_OK) {
- pr_err("Can't receive\n");
- return -1;
- }
- if (!is_service_fd(fle->fe->fd, CTL_TTY_OFF))
- goto out;
- }
-
- if (!d->ops->post_open)
- goto out;
- if (d->ops->post_open(d, fle->fe->fd))
- return -1;
-out:
- fle->stage = FLE_RESTORED;
- return 0;
-}
-
-
static int serve_out_fd(int pid, int fd, struct file_desc *d)
{
int ret;
@@ -1024,16 +961,10 @@ static int serve_out_fd(int pid, int fd, struct file_desc *d)
return ret;
}
-static int open_fd(int pid, struct fdinfo_list_entry *fle)
+static int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd)
{
struct file_desc *d = fle->desc;
- int new_fd;
-
- if (fle != file_master(d))
- return 0;
-
- if (d->ops->open(d, &new_fd) < 0)
- return -1;
+ pid_t pid = fle->pid;
if (reopen_fd_as(fle->fe->fd, new_fd))
return -1;
@@ -1046,7 +977,37 @@ static int open_fd(int pid, struct fdinfo_list_entry *fle)
if (fle->stage < FLE_OPEN)
fle->stage = FLE_OPEN;
- return serve_out_fd(pid, fle->fe->fd, d);
+ if (serve_out_fd(pid, fle->fe->fd, d))
+ return -1;
+ return 0;
+}
+
+static int open_fd(int pid, struct fdinfo_list_entry *fle)
+{
+ struct file_desc *d = fle->desc;
+ struct fdinfo_list_entry *flem;
+ int new_fd = -1, ret;
+
+ flem = file_master(d);
+ if (fle != flem) {
+ BUG_ON (fle->stage != FLE_INITIALIZED);
+ ret = receive_fd(pid, fle);
+ if (ret != ORV_OK)
+ return ret;
+
+ fle->stage = FLE_RESTORED;
+ return ORV_OK;
+ }
+
+ ret = d->ops->open(d, &new_fd);
+ if (ret != ORV_ERR && new_fd >= 0) {
+ if (setup_and_serve_out(fle, new_fd) < 0)
+ return ORV_ERR;
+ }
+
+ if (ret == ORV_OK)
+ fle->stage = FLE_RESTORED;
+ return ret;
}
static int receive_fd(int pid, struct fdinfo_list_entry *fle)
@@ -1070,25 +1031,43 @@ static int receive_fd(int pid, struct fdinfo_list_entry *fle)
return 0;
}
-static int open_fdinfo(int pid, struct fdinfo_list_entry *fle, int state)
-{
- pr_info("\tRestoring fd %d (state -> %s)\n",
- fle->fe->fd, states[state].name);
- return states[state].cb(pid, fle);
-}
-
static int open_fdinfos(int pid, struct list_head *list)
{
- int state, ret = 0;
- struct fdinfo_list_entry *fle;
+ struct fdinfo_list_entry *fle, *tmp, *service_fle = NULL;
+ LIST_HEAD(completed);
+ bool progress, again;
+ int st, ret = 0;
- for (state = 0; state < ARRAY_SIZE(states); state++) {
- list_for_each_entry(fle, list, ps_list) {
- ret = open_fdinfo(pid, fle, state);
- if (ret)
- break;
+ do {
+ progress = again = false;
+
+ list_for_each_entry_safe(fle, tmp, list, ps_list) {
+ st = fle->stage;
+ BUG_ON(st == FLE_RESTORED);
+ ret = open_fd(pid, fle);
+ if (ret == ORV_ERR)
+ goto splice;
+ if (st != fle->stage || ret == ORV_OK)
+ progress = true;
+ if (ret == ORV_OK) {
+ list_del(&fle->ps_list);
+ list_add(&fle->ps_list, &completed);
+ }
+ if (ret == ORV_AGAIN)
+ again = true;
+ if (fle->fe->fd == get_service_fd(CTL_TTY_OFF))
+ service_fle = fle;
}
- }
+ if (!progress && again)
+ wait_fds_event();
+ } while (again || progress);
+
+ BUG_ON(!list_empty(list));
+splice:
+ list_splice(&completed, list);
+
+ if (ret == ORV_OK && service_fle)
+ ret = tty_restore_ctl_terminal(service_fle->desc, service_fle->fe->fd);
return ret;
}
diff --git a/criu/include/files.h b/criu/include/files.h
index 51dae22..6512a07 100644
--- a/criu/include/files.h
+++ b/criu/include/files.h
@@ -115,11 +115,6 @@ struct file_desc_ops {
*/
int (*open)(struct file_desc *d, int *new_fd);
/*
- * Called on a file when all files of that type are opened
- * and with the fd being the "restored" one.
- */
- int (*post_open)(struct file_desc *d, int fd);
- /*
* Called to collect a new fd before adding it on desc. Clients
* may chose to collect it to some specific rst_info list. See
* prepare_fds() for details.
diff --git a/criu/include/tty.h b/criu/include/tty.h
index 2ff32c0..6fa00db 100644
--- a/criu/include/tty.h
+++ b/criu/include/tty.h
@@ -32,6 +32,8 @@ extern int prepare_shared_tty(void);
extern int tty_prep_fds(void);
extern void tty_fini_fds(void);
+extern int tty_restore_ctl_terminal(struct file_desc *d, int fd);
+
#define OPT_SHELL_JOB "shell-job"
#endif /* __CR_TTY_H__ */
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
index 6352b2b..63084a0 100644
--- a/criu/sk-inet.c
+++ b/criu/sk-inet.c
@@ -453,7 +453,6 @@ static int post_open_inet_sk(struct file_desc *d, int sk);
static struct file_desc_ops inet_desc_ops = {
.type = FD_TYPES__INETSK,
.open = open_inet_sk,
- .post_open = post_open_inet_sk,
};
static inline int tcp_connection(InetSkEntry *ie)
@@ -548,14 +547,14 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
if (ii->ie->opts->reuseaddr)
return 0;
- while (atomic_read(&ii->port->users))
- wait_fds_event();
+ if (atomic_read(&ii->port->users))
+ return ORV_AGAIN;
val = ii->ie->opts->reuseaddr;
if (restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, &val))
return -1;
- return 0;
+ return ORV_OK;
}
int restore_ip_opts(int sk, IpOptsEntry *ioe)
@@ -570,10 +569,14 @@ int restore_ip_opts(int sk, IpOptsEntry *ioe)
static int open_inet_sk(struct file_desc *d, int *new_fd)
{
+ struct fdinfo_list_entry *fle = file_master(d);
struct inet_sk_info *ii;
InetSkEntry *ie;
int sk, yes = 1;
+ if (fle->stage >= FLE_OPEN)
+ goto post_open;
+
ii = container_of(d, struct inet_sk_info, d);
ie = ii->ie;
@@ -667,7 +670,8 @@ static int open_inet_sk(struct file_desc *d, int *new_fd)
goto err;
*new_fd = sk;
- return ORV_OK;
+post_open:
+ return post_open_inet_sk(d, fle->fe->fd);
err:
close(sk);
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index b60ed54..be22a42 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -902,8 +902,8 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
/* Skip external sockets */
if (!list_empty(&peer->d.fd_info_head))
- while (peer_is_not_prepared(peer))
- wait_fds_event();
+ if (peer_is_not_prepared(peer))
+ return ORV_AGAIN;
if (ui->ue->uflags & USK_INHERIT)
return 0;
@@ -1278,9 +1278,14 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
static int open_unix_sk(struct file_desc *d, int *new_fd)
{
+ struct fdinfo_list_entry *fle;
struct unix_sk_info *ui;
int ret;
+ fle = file_master(d);
+ if (fle->stage >= FLE_OPEN)
+ goto post_open;
+
ui = container_of(d, struct unix_sk_info, d);
if (inherited_fd(d, new_fd)) {
@@ -1293,7 +1298,10 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
else
ret = open_unixsk_standalone(ui, new_fd);
- return ret;
+ if (ret != ORV_OK)
+ return ret;
+post_open:
+ return post_open_unix_sk(d, fle->fe->fd);
}
static char *socket_d_name(struct file_desc *d, char *buf, size_t s)
@@ -1314,7 +1322,6 @@ static char *socket_d_name(struct file_desc *d, char *buf, size_t s)
static struct file_desc_ops unix_desc_ops = {
.type = FD_TYPES__UNIXSK,
.open = open_unix_sk,
- .post_open = post_open_unix_sk,
.name = socket_d_name,
};
diff --git a/criu/tty.c b/criu/tty.c
index 1bf772b..a745e41 100644
--- a/criu/tty.c
+++ b/criu/tty.c
@@ -659,7 +659,7 @@ static int tty_set_prgp(int fd, int group)
return 0;
}
-static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
+int tty_restore_ctl_terminal(struct file_desc *d, int fd)
{
struct tty_info *info = container_of(d, struct tty_info, d);
struct tty_driver *driver = info->driver;
@@ -667,8 +667,7 @@ static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
struct file_desc *slave_d;
int slave = -1, ret = -1, index = -1;
- if (!is_service_fd(fd, CTL_TTY_OFF))
- return 0;
+ BUG_ON(!is_service_fd(fd, CTL_TTY_OFF));
if (driver->type == TTY_TYPE__EXT_TTY) {
slave = -1;
@@ -708,7 +707,7 @@ static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
err:
pty_free_fake_reg(&fake);
close(fd);
- return ret;
+ return ret ? ORV_ERR : ORV_OK;
}
static bool tty_is_master(struct tty_info *info)
@@ -1149,7 +1148,6 @@ static char *tty_d_name(struct file_desc *d, char *buf, size_t s)
static struct file_desc_ops tty_desc_ops = {
.type = FD_TYPES__TTY,
.open = tty_open,
- .post_open = tty_restore_ctl_terminal,
.collect_fd = tty_collect_fd,
.name = tty_d_name,
};
More information about the CRIU
mailing list