[CRIU] [PATCH] lazy-pages: interleave #PF handling with transfers of remaining pages
Adrian Reber
areber at redhat.com
Thu Dec 1 00:19:27 PST 2016
On Thu, Dec 01, 2016 at 10:09:33AM +0200, Mike Rapoport wrote:
> Currently we poll userfaultfd for page faults and if there were no page
> faults during 5 seconds we stop monitoring the userfaultfd and start
> copying remaining pages chunk by chunk.
> If a page fault occurs during the copy, the faulting process will be stuck
> until the page it accessed would be copied to its address space.
> This patch limits the initial "page fault only" stage to 1 second instead
> of 5, and interleaves non-blocking poll of userfaultfd with copying of the
> remaining memory afterwards.
I am curious why you do not start the copying immediately and remove the
"page fault only" stage completely. If I remember correctly that is also
what QEMU does. As soon as everything is setup the pages are copied to
the destination system and interleaved with requested pages.
Adrian
> Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
> ---
> criu/uffd.c | 56 ++++++++++++++++++++++++++++++--------------------------
> 1 file changed, 30 insertions(+), 26 deletions(-)
>
> diff --git a/criu/uffd.c b/criu/uffd.c
> index bd8eaca..93cb9d1 100644
> --- a/criu/uffd.c
> +++ b/criu/uffd.c
> @@ -70,7 +70,6 @@ struct lazy_pages_info {
> struct list_head l;
>
> void *buf;
> - bool remaining;
> };
>
> static LIST_HEAD(lpis);
> @@ -547,9 +546,6 @@ static int complete_page_fault(struct lazy_pages_info *lpi, unsigned long vaddr,
> if (uffd_copy(lpi, vaddr, nr))
> return -1;
>
> - if (lpi->remaining)
> - return 0;
> -
> return update_lazy_iovecs(lpi, vaddr, nr * PAGE_SIZE);
> }
>
> @@ -631,18 +627,16 @@ static int handle_remaining_pages(struct lazy_pages_info *lpi)
> struct lazy_iovec *lazy_iov;
> int nr_pages, err;
>
> - lpi->remaining = true;
> -
> - lpi->pr.reset(&lpi->pr);
> + if (list_empty(&lpi->iovs))
> + return 0;
>
> - list_for_each_entry(lazy_iov, &lpi->iovs, l) {
> - nr_pages = lazy_iov->len / PAGE_SIZE;
> + lazy_iov = list_first_entry(&lpi->iovs, struct lazy_iovec, l);
> + nr_pages = lazy_iov->len / PAGE_SIZE;
>
> - err = uffd_handle_pages(lpi, lazy_iov->base, nr_pages, 0);
> - if (err < 0) {
> - pr_err("Error during UFFD copy\n");
> - return -1;
> - }
> + err = uffd_handle_pages(lpi, lazy_iov->base, nr_pages, 0);
> + if (err < 0) {
> + pr_err("Error during UFFD copy\n");
> + return -1;
> }
>
> return 0;
> @@ -717,27 +711,37 @@ static int lazy_pages_summary(struct lazy_pages_info *lpi)
> return 0;
> }
>
> -#define POLL_TIMEOUT 5000
> +#define POLL_TIMEOUT 1000
>
> static int handle_requests(int epollfd, struct epoll_event *events, int nr_fds)
> {
> struct lazy_pages_info *lpi;
> + int poll_timeout = POLL_TIMEOUT;
> int ret;
>
> - ret = epoll_run_rfds(epollfd, events, nr_fds, POLL_TIMEOUT);
> - if (ret < 0)
> - goto out;
> -
> + for (;;) {
> + bool remaining = false;
>
> - pr_debug("switching from request to copy mode\n");
> - pr_debug("Handle remaining pages\n");
> - list_for_each_entry(lpi, &lpis, l) {
> - ret = handle_remaining_pages(lpi);
> - if (ret < 0) {
> - pr_err("Error during remaining page copy\n");
> - ret = 1;
> + ret = epoll_run_rfds(epollfd, events, nr_fds, poll_timeout);
> + if (ret < 0)
> goto out;
> +
> + if (poll_timeout)
> + pr_debug("Start handling remaining pages\n");
> +
> + poll_timeout = 0;
> + list_for_each_entry(lpi, &lpis, l) {
> + if (lpi->copied_pages < lpi->total_pages) {
> + remaining = true;
> + ret = handle_remaining_pages(lpi);
> + if (ret < 0)
> + goto out;
> + break;
> + }
> }
> +
> + if (!remaining)
> + break;
> }
>
> list_for_each_entry(lpi, &lpis, l)
> --
> 1.9.1
>
More information about the CRIU
mailing list