[CRIU] [PATCH] lazy-pages: interleave #PF handling with transfers of remaining pages

Adrian Reber areber at redhat.com
Thu Dec 1 00:19:27 PST 2016


On Thu, Dec 01, 2016 at 10:09:33AM +0200, Mike Rapoport wrote:
> Currently we poll userfaultfd for page faults and if there were no page
> faults during 5 seconds we stop monitoring the userfaultfd and start
> copying remaining pages chunk by chunk.
> If a page fault occurs during the copy, the faulting process will be stuck
> until the page it accessed would be copied to its address space.
> This patch limits the initial "page fault only" stage to 1 second instead
> of 5, and interleaves non-blocking poll of userfaultfd with copying of the
> remaining memory afterwards.

I am curious why you do not start the copying immediately and remove the
"page fault only" stage completely. If I remember correctly that is also
what QEMU does. As soon as everything is setup the pages are copied to
the destination system and interleaved with requested pages.

		Adrian

> Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
> ---
>  criu/uffd.c | 56 ++++++++++++++++++++++++++++++--------------------------
>  1 file changed, 30 insertions(+), 26 deletions(-)
> 
> diff --git a/criu/uffd.c b/criu/uffd.c
> index bd8eaca..93cb9d1 100644
> --- a/criu/uffd.c
> +++ b/criu/uffd.c
> @@ -70,7 +70,6 @@ struct lazy_pages_info {
>  	struct list_head l;
>  
>  	void *buf;
> -	bool remaining;
>  };
>  
>  static LIST_HEAD(lpis);
> @@ -547,9 +546,6 @@ static int complete_page_fault(struct lazy_pages_info *lpi, unsigned long vaddr,
>  	if (uffd_copy(lpi, vaddr, nr))
>  		return -1;
>  
> -	if (lpi->remaining)
> -		return 0;
> -
>  	return update_lazy_iovecs(lpi, vaddr, nr * PAGE_SIZE);
>  }
>  
> @@ -631,18 +627,16 @@ static int handle_remaining_pages(struct lazy_pages_info *lpi)
>  	struct lazy_iovec *lazy_iov;
>  	int nr_pages, err;
>  
> -	lpi->remaining = true;
> -
> -	lpi->pr.reset(&lpi->pr);
> +	if (list_empty(&lpi->iovs))
> +		return 0;
>  
> -	list_for_each_entry(lazy_iov, &lpi->iovs, l) {
> -		nr_pages = lazy_iov->len / PAGE_SIZE;
> +	lazy_iov = list_first_entry(&lpi->iovs, struct lazy_iovec, l);
> +	nr_pages = lazy_iov->len / PAGE_SIZE;
>  
> -		err = uffd_handle_pages(lpi, lazy_iov->base, nr_pages, 0);
> -		if (err < 0) {
> -			pr_err("Error during UFFD copy\n");
> -			return -1;
> -		}
> +	err = uffd_handle_pages(lpi, lazy_iov->base, nr_pages, 0);
> +	if (err < 0) {
> +		pr_err("Error during UFFD copy\n");
> +		return -1;
>  	}
>  
>  	return 0;
> @@ -717,27 +711,37 @@ static int lazy_pages_summary(struct lazy_pages_info *lpi)
>  	return 0;
>  }
>  
> -#define POLL_TIMEOUT 5000
> +#define POLL_TIMEOUT 1000
>  
>  static int handle_requests(int epollfd, struct epoll_event *events, int nr_fds)
>  {
>  	struct lazy_pages_info *lpi;
> +	int poll_timeout = POLL_TIMEOUT;
>  	int ret;
>  
> -	ret = epoll_run_rfds(epollfd, events, nr_fds, POLL_TIMEOUT);
> -	if (ret < 0)
> -		goto out;
> -
> +	for (;;) {
> +		bool remaining = false;
>  
> -	pr_debug("switching from request to copy mode\n");
> -	pr_debug("Handle remaining pages\n");
> -	list_for_each_entry(lpi, &lpis, l) {
> -		ret = handle_remaining_pages(lpi);
> -		if (ret < 0) {
> -			pr_err("Error during remaining page copy\n");
> -			ret = 1;
> +		ret = epoll_run_rfds(epollfd, events, nr_fds, poll_timeout);
> +		if (ret < 0)
>  			goto out;
> +
> +		if (poll_timeout)
> +			pr_debug("Start handling remaining pages\n");
> +
> +		poll_timeout = 0;
> +		list_for_each_entry(lpi, &lpis, l) {
> +			if (lpi->copied_pages < lpi->total_pages) {
> +				remaining = true;
> +				ret = handle_remaining_pages(lpi);
> +				if (ret < 0)
> +					goto out;
> +				break;
> +			}
>  		}
> +
> +		if (!remaining)
> +			break;
>  	}
>  
>  	list_for_each_entry(lpi, &lpis, l)
> -- 
> 1.9.1
> 


More information about the CRIU mailing list