[CRIU] [PATCH] lazy-pages: interleave #PF handling with transfers of remaining pages
Mike Rapoport
rppt at linux.vnet.ibm.com
Thu Dec 1 00:27:24 PST 2016
On Thu, Dec 01, 2016 at 09:19:27AM +0100, Adrian Reber wrote:
> On Thu, Dec 01, 2016 at 10:09:33AM +0200, Mike Rapoport wrote:
> > Currently we poll userfaultfd for page faults and if there were no page
> > faults during 5 seconds we stop monitoring the userfaultfd and start
> > copying remaining pages chunk by chunk.
> > If a page fault occurs during the copy, the faulting process will be stuck
> > until the page it accessed would be copied to its address space.
> > This patch limits the initial "page fault only" stage to 1 second instead
> > of 5, and interleaves non-blocking poll of userfaultfd with copying of the
> > remaining memory afterwards.
>
> I am curious why you do not start the copying immediately and remove the
> "page fault only" stage completely. If I remember correctly that is also
> what QEMU does. As soon as everything is setup the pages are copied to
> the destination system and interleaved with requested pages.
We wouldn't want to waste disk/network bandwidth for pages outside the
working set until the working set is restored. So the "page fault only"
mode is used to somehow prioritize the page fault handling.
There is a huge room for improvement here still :)
> Adrian
>
> > Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
> > ---
> > criu/uffd.c | 56 ++++++++++++++++++++++++++++++--------------------------
> > 1 file changed, 30 insertions(+), 26 deletions(-)
> >
> > diff --git a/criu/uffd.c b/criu/uffd.c
> > index bd8eaca..93cb9d1 100644
> > --- a/criu/uffd.c
> > +++ b/criu/uffd.c
> > @@ -70,7 +70,6 @@ struct lazy_pages_info {
> > struct list_head l;
> >
> > void *buf;
> > - bool remaining;
> > };
> >
> > static LIST_HEAD(lpis);
> > @@ -547,9 +546,6 @@ static int complete_page_fault(struct lazy_pages_info *lpi, unsigned long vaddr,
> > if (uffd_copy(lpi, vaddr, nr))
> > return -1;
> >
> > - if (lpi->remaining)
> > - return 0;
> > -
> > return update_lazy_iovecs(lpi, vaddr, nr * PAGE_SIZE);
> > }
> >
> > @@ -631,18 +627,16 @@ static int handle_remaining_pages(struct lazy_pages_info *lpi)
> > struct lazy_iovec *lazy_iov;
> > int nr_pages, err;
> >
> > - lpi->remaining = true;
> > -
> > - lpi->pr.reset(&lpi->pr);
> > + if (list_empty(&lpi->iovs))
> > + return 0;
> >
> > - list_for_each_entry(lazy_iov, &lpi->iovs, l) {
> > - nr_pages = lazy_iov->len / PAGE_SIZE;
> > + lazy_iov = list_first_entry(&lpi->iovs, struct lazy_iovec, l);
> > + nr_pages = lazy_iov->len / PAGE_SIZE;
> >
> > - err = uffd_handle_pages(lpi, lazy_iov->base, nr_pages, 0);
> > - if (err < 0) {
> > - pr_err("Error during UFFD copy\n");
> > - return -1;
> > - }
> > + err = uffd_handle_pages(lpi, lazy_iov->base, nr_pages, 0);
> > + if (err < 0) {
> > + pr_err("Error during UFFD copy\n");
> > + return -1;
> > }
> >
> > return 0;
> > @@ -717,27 +711,37 @@ static int lazy_pages_summary(struct lazy_pages_info *lpi)
> > return 0;
> > }
> >
> > -#define POLL_TIMEOUT 5000
> > +#define POLL_TIMEOUT 1000
> >
> > static int handle_requests(int epollfd, struct epoll_event *events, int nr_fds)
> > {
> > struct lazy_pages_info *lpi;
> > + int poll_timeout = POLL_TIMEOUT;
> > int ret;
> >
> > - ret = epoll_run_rfds(epollfd, events, nr_fds, POLL_TIMEOUT);
> > - if (ret < 0)
> > - goto out;
> > -
> > + for (;;) {
> > + bool remaining = false;
> >
> > - pr_debug("switching from request to copy mode\n");
> > - pr_debug("Handle remaining pages\n");
> > - list_for_each_entry(lpi, &lpis, l) {
> > - ret = handle_remaining_pages(lpi);
> > - if (ret < 0) {
> > - pr_err("Error during remaining page copy\n");
> > - ret = 1;
> > + ret = epoll_run_rfds(epollfd, events, nr_fds, poll_timeout);
> > + if (ret < 0)
> > goto out;
> > +
> > + if (poll_timeout)
> > + pr_debug("Start handling remaining pages\n");
> > +
> > + poll_timeout = 0;
> > + list_for_each_entry(lpi, &lpis, l) {
> > + if (lpi->copied_pages < lpi->total_pages) {
> > + remaining = true;
> > + ret = handle_remaining_pages(lpi);
> > + if (ret < 0)
> > + goto out;
> > + break;
> > + }
> > }
> > +
> > + if (!remaining)
> > + break;
> > }
> >
> > list_for_each_entry(lpi, &lpis, l)
> > --
> > 1.9.1
> >
>
More information about the CRIU
mailing list