[CRIU] [PATCH 07/11] lazy-pages: make handle_requests also poll for listening socket
Mike Rapoport
rppt at linux.vnet.ibm.com
Tue Apr 12 04:27:53 PDT 2016
On Tue, Apr 12, 2016 at 02:06:22PM +0300, Pavel Emelyanov wrote:
> On 04/11/2016 09:19 AM, Mike Rapoport wrote:
> > Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
> > ---
> > criu/uffd.c | 119 +++++++++++++++++++++++++++++++++++++++---------------------
> > 1 file changed, 77 insertions(+), 42 deletions(-)
> >
> > diff --git a/criu/uffd.c b/criu/uffd.c
> > index 87c2602..fece56e 100644
> > --- a/criu/uffd.c
> > +++ b/criu/uffd.c
> > @@ -545,32 +545,56 @@ static int handle_user_fault(int fd, struct list_head *uffd_list, void *dest)
> > return 0;
> > }
> >
> > +static int handle_socket_conn(int listen, struct sockaddr_un *saddr,
> > + struct list_head *uffd_list)
> > +{
> > + int uffd;
> > + int uffd_flags;
> > +
> > + uffd = ud_open(listen, saddr);
> > + if (uffd < 0) {
> > + pr_perror("uffd open error");
> > + return -1;
> > + }
> > +
> > + pr_debug("uffd is 0x%d\n", uffd);
> > + uffd_flags = fcntl(uffd, F_GETFD, NULL);
> > + pr_debug("uffd_flags are 0x%x\n", uffd_flags);
> > + if (fcntl(uffd, F_SETFD, uffd_flags | O_NONBLOCK))
> > + return -1;
> > +
> > + /*
> > + * Find the memory pages belonging to the restored process so
> > + * that it is trackable when all pages have been transferred.
> > + */
> > + if ((total_pages = find_vmas(uffd_list)) == -1)
> > + return -1;
> > +
> > + return uffd;
> > +}
> > +
> > #define POLL_TIMEOUT 5000
> >
> > -static int handle_requests(int fd)
> > +static int handle_requests(int listen, struct sockaddr_un *saddr)
> > {
> > struct pollfd *pollfd;
> > - int nr_pollfd;
> > + int nr_pollfd = 1;
> > + int max_pollfd;
> > + int timeout = -1;
> > int ret = -1;
> > unsigned long ps;
> > void *dest;
> > + int i;
> >
> > LIST_HEAD(uffd_list);
> >
> > - /*
> > - * Find the memory pages belonging to the restored process
> > - * so that it is trackable when all pages have been transferred.
> > - */
> > - if ((total_pages = find_vmas(&uffd_list)) == -1)
> > - return -1;
> > -
> > /* All operations will be done on page size */
> > ps = page_size();
> > dest = malloc(ps);
> >
> > /* allocate pollfd per task + pollfd for listen */
> > - nr_pollfd = task_entries->nr_tasks + 1;
> > - pollfd = malloc(sizeof(*pollfd) * nr_pollfd);
> > + max_pollfd = task_entries->nr_tasks + 1;
> > + pollfd = malloc(sizeof(*pollfd) * max_pollfd);
> > if (!pollfd || !dest) {
> > pr_err("memory allocation failed\n");
> > ret = -1;
> > @@ -578,12 +602,10 @@ static int handle_requests(int fd)
> > }
> >
> > /*
> > - * use nr_pollfd to set number of pollfd's being polled
> > - * FIXME: eventually nr_pollfd should be nr_tasks + 1, and the
> > - * first fd to poll on should that of listen
> > + * use nr_pollfd to set number of pollfd's being polled, the
> > + * first pollfd is always for the listening socket
> > */
> > - nr_pollfd = 1;
> > - pollfd[0].fd = fd;
> > + pollfd[0].fd = listen;
> > pollfd[0].events = POLLIN;
> >
> > while (1) {
> > @@ -592,7 +614,7 @@ static int handle_requests(int fd)
> > * no uffd pages are requested the code switches to
> > * copying the remaining pages.
> > */
> > - ret = poll(pollfd, nr_pollfd, POLL_TIMEOUT);
> > + ret = poll(pollfd, nr_pollfd, timeout);
>
> But there should be strict split between the time we expect for new connections
> on this socket versus when pagefaults start arriving. Because first we restore
> the tree and send uffd sockets to this daemon and _then_ we resume the tasks and
> from now on no new connections may appear, but PF-s start popping up from uffds.
>
> Since we know this, why do we need to complicate this and poll for all the
> sockets together?
I had an impression that the uffd is sent to the daemon at the stage when
tasks are already running concurrently. If I was wrong here and the tasks
are resumed after all the uffd's are sent to the daemon, then we indeed
don't need to poll for uffd's and sockets together.
> > pr_debug("poll() rc: 0x%x\n", ret);
> > if (ret < 0) {
> > pr_perror("polling failed");
> > @@ -603,16 +625,42 @@ static int handle_requests(int fd)
> > break;
> > }
> >
> > - ret = handle_user_fault(fd, &uffd_list, dest);
> > - if (ret < 0)
> > - goto out;
> > + if (pollfd[0].revents & POLLIN) {
> > + int fd = handle_socket_conn(listen, saddr, &uffd_list);
> > + if (fd < 0)
> > + goto out;
> > + pollfd[nr_pollfd].fd = fd;
> > + pollfd[nr_pollfd].events = POLLIN;
> > + nr_pollfd++;
> > + BUG_ON(nr_pollfd > max_pollfd);
> > +
> > + /*
> > + * once the first uffd is register wait POLL_TIMEOUT
> > + * before starting bulk page trasfer
> > + */
> > + timeout = POLL_TIMEOUT;
> > +
> > + continue;
> > + }
> > +
> > + for (i = 1; i < nr_pollfd; i++) {
> > + if (pollfd[i].revents & POLLIN) {
> > + ret = handle_user_fault(pollfd[i].fd,
> > + &uffd_list, dest);
> > + if (ret < 0)
> > + goto out;
> > + }
> > + }
> > }
> > - pr_debug("Handle remaining pages\n");
> > - ret = handle_remaining_pages(fd, &uffd_list, dest);
> > - if (ret < 0) {
> > - pr_err("Error during remaining page copy\n");
> > - ret = 1;
> > - goto out;
> > +
> > + for (i = 1; i < nr_pollfd; i++) {
> > + pr_debug("Handle remaining pages\n");
> > + ret = handle_remaining_pages(pollfd[i].fd, &uffd_list, dest);
> > + if (ret < 0) {
> > + pr_err("Error during remaining page copy\n");
> > + ret = 1;
> > + goto out;
> > + }
> > }
> >
> > pr_debug("With UFFD transferred pages: (%ld/%ld)\n", uffd_copied_pages, total_pages);
> > @@ -626,9 +674,10 @@ static int handle_requests(int fd)
> > ret = 0;
> >
> > out:
> > + for (i = nr_pollfd - 1; i > 0; i--)
> > + close(pollfd[i].fd);
> > free(pollfd);
> > free(dest);
> > - close(fd);
> > return ret;
> >
> > }
> > @@ -657,8 +706,6 @@ static int lazy_pages_prepare_pstree(void)
> > int cr_lazy_pages()
> > {
> > int listen;
> > - int uffd;
> > - int uffd_flags;
> > int ret;
> > struct sockaddr_un saddr;
> >
> > @@ -679,19 +726,7 @@ int cr_lazy_pages()
> > return -1;
> > }
> >
> > - uffd = ud_open(listen, &saddr);
> > - if (uffd < 0) {
> > - pr_perror("uffd open error");
> > - return -1;
> > - }
> > -
> > - pr_debug("uffd is 0x%d\n", uffd);
> > - uffd_flags = fcntl(uffd, F_GETFD, NULL);
> > - pr_debug("uffd_flags are 0x%x\n", uffd_flags);
> > - if (fcntl(uffd, F_SETFD, uffd_flags | O_NONBLOCK))
> > - return -1;
> > -
> > - ret = handle_requests(uffd);
> > + ret = handle_requests(listen, &saddr);
> > close(listen);
> >
> > return ret;
> >
>
More information about the CRIU
mailing list