[CRIU] [PATCH] lazy-pages: interleave #PF handling with transfers of remaining pages

Pavel Emelyanov xemul at virtuozzo.com
Mon Dec 5 01:09:42 PST 2016


On 12/01/2016 11:27 AM, Mike Rapoport wrote:
> On Thu, Dec 01, 2016 at 09:19:27AM +0100, Adrian Reber wrote:
>> On Thu, Dec 01, 2016 at 10:09:33AM +0200, Mike Rapoport wrote:
>>> Currently we poll userfaultfd for page faults and if there were no page
>>> faults during 5 seconds we stop monitoring the userfaultfd and start
>>> copying remaining pages chunk by chunk.
>>> If a page fault occurs during the copy, the faulting process will be stuck
>>> until the page it accessed would be copied to its address space.
>>> This patch limits the initial "page fault only" stage to 1 second instead
>>> of 5, and interleaves non-blocking poll of userfaultfd with copying of the
>>> remaining memory afterwards.
>>
>> I am curious why you do not start the copying immediately and remove the
>> "page fault only" stage completely. If I remember correctly that is also
>> what QEMU does. As soon as everything is setup the pages are copied to
>> the destination system and interleaved with requested pages.

Yup, ideally this is how it should look like, but there are two concerns:

1. we need to know what pages are required asap, so #PF mode is still
   needed (this is what Mike said)

2. sending requests for non-PF-ed pages would make the network channel
   transmit needed pages slower, thus resulting in latencies, so some
   priority for #PF-ed pages is still needed, I think.

> We wouldn't want to waste disk/network bandwidth for pages outside the
> working set until the working set is restored. So the "page fault only"
> mode is used to somehow prioritize the page fault handling.
> 
> There is a huge room for improvement here still :)

Yes, so for now I apply the set to reduce the 5 seconds per test tmo
we have in jenkins and let's look for better policy :)

-- Pavel

>> 		Adrian
>>
>>> Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
>>> ---
>>>  criu/uffd.c | 56 ++++++++++++++++++++++++++++++--------------------------
>>>  1 file changed, 30 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/criu/uffd.c b/criu/uffd.c
>>> index bd8eaca..93cb9d1 100644
>>> --- a/criu/uffd.c
>>> +++ b/criu/uffd.c
>>> @@ -70,7 +70,6 @@ struct lazy_pages_info {
>>>  	struct list_head l;
>>>  
>>>  	void *buf;
>>> -	bool remaining;
>>>  };
>>>  
>>>  static LIST_HEAD(lpis);
>>> @@ -547,9 +546,6 @@ static int complete_page_fault(struct lazy_pages_info *lpi, unsigned long vaddr,
>>>  	if (uffd_copy(lpi, vaddr, nr))
>>>  		return -1;
>>>  
>>> -	if (lpi->remaining)
>>> -		return 0;
>>> -
>>>  	return update_lazy_iovecs(lpi, vaddr, nr * PAGE_SIZE);
>>>  }
>>>  
>>> @@ -631,18 +627,16 @@ static int handle_remaining_pages(struct lazy_pages_info *lpi)
>>>  	struct lazy_iovec *lazy_iov;
>>>  	int nr_pages, err;
>>>  
>>> -	lpi->remaining = true;
>>> -
>>> -	lpi->pr.reset(&lpi->pr);
>>> +	if (list_empty(&lpi->iovs))
>>> +		return 0;
>>>  
>>> -	list_for_each_entry(lazy_iov, &lpi->iovs, l) {
>>> -		nr_pages = lazy_iov->len / PAGE_SIZE;
>>> +	lazy_iov = list_first_entry(&lpi->iovs, struct lazy_iovec, l);
>>> +	nr_pages = lazy_iov->len / PAGE_SIZE;
>>>  
>>> -		err = uffd_handle_pages(lpi, lazy_iov->base, nr_pages, 0);
>>> -		if (err < 0) {
>>> -			pr_err("Error during UFFD copy\n");
>>> -			return -1;
>>> -		}
>>> +	err = uffd_handle_pages(lpi, lazy_iov->base, nr_pages, 0);
>>> +	if (err < 0) {
>>> +		pr_err("Error during UFFD copy\n");
>>> +		return -1;
>>>  	}
>>>  
>>>  	return 0;
>>> @@ -717,27 +711,37 @@ static int lazy_pages_summary(struct lazy_pages_info *lpi)
>>>  	return 0;
>>>  }
>>>  
>>> -#define POLL_TIMEOUT 5000
>>> +#define POLL_TIMEOUT 1000
>>>  
>>>  static int handle_requests(int epollfd, struct epoll_event *events, int nr_fds)
>>>  {
>>>  	struct lazy_pages_info *lpi;
>>> +	int poll_timeout = POLL_TIMEOUT;
>>>  	int ret;
>>>  
>>> -	ret = epoll_run_rfds(epollfd, events, nr_fds, POLL_TIMEOUT);
>>> -	if (ret < 0)
>>> -		goto out;
>>> -
>>> +	for (;;) {
>>> +		bool remaining = false;
>>>  
>>> -	pr_debug("switching from request to copy mode\n");
>>> -	pr_debug("Handle remaining pages\n");
>>> -	list_for_each_entry(lpi, &lpis, l) {
>>> -		ret = handle_remaining_pages(lpi);
>>> -		if (ret < 0) {
>>> -			pr_err("Error during remaining page copy\n");
>>> -			ret = 1;
>>> +		ret = epoll_run_rfds(epollfd, events, nr_fds, poll_timeout);
>>> +		if (ret < 0)
>>>  			goto out;
>>> +
>>> +		if (poll_timeout)
>>> +			pr_debug("Start handling remaining pages\n");
>>> +
>>> +		poll_timeout = 0;
>>> +		list_for_each_entry(lpi, &lpis, l) {
>>> +			if (lpi->copied_pages < lpi->total_pages) {
>>> +				remaining = true;
>>> +				ret = handle_remaining_pages(lpi);
>>> +				if (ret < 0)
>>> +					goto out;
>>> +				break;
>>> +			}
>>>  		}
>>> +
>>> +		if (!remaining)
>>> +			break;
>>>  	}
>>>  
>>>  	list_for_each_entry(lpi, &lpis, l)
>>> -- 
>>> 1.9.1
>>>
>>
> 
> .
> 



More information about the CRIU mailing list