[CRIU] [PATCH 2/3] page-read: Add async arg to ->read_pages callback

Pavel Emelyanov xemul at virtuozzo.com
Thu Nov 10 00:46:21 PST 2016


On 11/10/2016 09:50 AM, Mike Rapoport wrote:
> On Thu, Nov 10, 2016 at 01:02:01AM +0300, Pavel Emelyanov wrote:
>> On 11/09/2016 09:44 PM, Mike Rapoport wrote:
>>> On Mon, Nov 07, 2016 at 07:03:07PM +0300, Pavel Emelyanov wrote:
>>>> Flag PR_ASYNC means, that the caller is OK if the routine
>>>> returns w/o data read into the buffer provided.
>>>>
>>>> Flag PR_UNPLUG means, that the caller nonetheless wants the
>>>> data to be available ASAP.
>>>
>>> This one does not seem to be used neither in this patch nor in the next
>>> one.
>>
>> Yup. I created one when thought how this would work in UFFD case.
>>
>>> Can you elaborate on how it should/will be used? From the description
>>> above, the name should be PR_ASAP if we'd ever use it ;-)
>>
>> Sure, so the PR_ASYNC flag means, that the ->read_pages callback may
>> return w/o actually reading the data into the buffer. But with such a
>> notation there's a radical difference between regular restore and lazy
>> restore.
>>
>> In the former case PR_ASYNC may defer start reading the data from image
>> until the very end stage of restore. In the latter case the read can
>> (and should) be async, but the reading shouldn't be delayed even for a 
>> fraction of second, since it's called from the page-fault handler. Thus
>> the 2nd flag -- PR_UNPLUG (or PR_ASAP) which means -- the caller is OK
>> with not having the data in the buffer when the callback returns, but
>> nobody wants to wait for the data for too long, so the IO should be
>> started ASAP (i.e. right now).
>>
>> So regular restore should have only PR_ASYNC flag, and it really has
>> one in restore_priv_vma_contents(), but the uffd restore should have
>> PR_ASYNC | PR_UNPLUG combination.
> 
> And this combination would make read_pagemap_page grow from 5 screens to 7? ;-)

Nope :) Current (with this patch) read_pagemap_page already handles both
flags.

>> It's not there since there's no code that uses page-reads in uffd remote
>> case :)
> 
> For the uffd remote pages read I was planning adding open_remote_page_read
> rather than adding if-clauses to read_pagemap_pages. If I may suggest,
> let's move forward with PR_ASYNC for the local restore and we'll take care
> of lazy remote pages later on.
> BTW, local lazy restore can use PR_ASYNC in handle_remaining_pages, I
> believe.

PR_ASYNC | PR_ASAP 

>> -- Pavel
> 
> --
> Sincerely yours,
> Mike.
> 
>>>> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
>>>> ---
>>>>  criu/include/pagemap.h | 6 +++++-
>>>>  criu/mem.c             | 4 ++--
>>>>  criu/pagemap.c         | 5 +++--
>>>>  criu/shmem.c           | 2 +-
>>>>  criu/uffd.c            | 2 +-
>>>>  5 files changed, 12 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h
>>>> index 705a9af..b445d82 100644
>>>> --- a/criu/include/pagemap.h
>>>> +++ b/criu/include/pagemap.h
>>>> @@ -47,7 +47,7 @@ struct page_read {
>>>>  	 */
>>>>  	int (*get_pagemap)(struct page_read *, struct iovec *iov);
>>>>  	/* reads page from current pagemap */
>>>> -	int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *);
>>>> +	int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *, unsigned flags);
>>>>  	void (*close)(struct page_read *);
>>>>  	void (*skip_pages)(struct page_read *, unsigned long len);
>>>>  	int (*seek_page)(struct page_read *pr, unsigned long vaddr, bool warn);
>>>> @@ -74,6 +74,10 @@ struct page_read {
>>>>  	int curr_pme;
>>>>  };
>>>>
>>>> +/* flags for ->read_pages */
>>>> +#define PR_ASYNC	0x1 /* may exit w/o data in the buffer */
>>>> +#define PR_UNPLUG	0x2 /* do async, but start the IO asap */
>>>> +
>>>>  #define PR_SHMEM	0x1
>>>>  #define PR_TASK		0x2
>>>>
>>>> diff --git a/criu/mem.c b/criu/mem.c
>>>> index 809b637..7da64f5 100644
>>>> --- a/criu/mem.c
>>>> +++ b/criu/mem.c
>>>> @@ -790,7 +790,7 @@ static int restore_priv_vma_content(struct pstree_item *t)
>>>>  			if (vma->ppage_bitmap) { /* inherited vma */
>>>>  				clear_bit(off, vma->ppage_bitmap);
>>>>
>>>> -				ret = pr.read_pages(&pr, va, 1, buf);
>>>> +				ret = pr.read_pages(&pr, va, 1, buf, 0);
>>>>  				if (ret < 0)
>>>>  					goto err_read;
>>>>
>>>> @@ -818,7 +818,7 @@ static int restore_priv_vma_content(struct pstree_item *t)
>>>>
>>>>  				nr = min_t(int, nr_pages - i, (vma->e->end - va) / PAGE_SIZE);
>>>>
>>>> -				ret = pr.read_pages(&pr, va, nr, p);
>>>> +				ret = pr.read_pages(&pr, va, nr, p, PR_ASYNC);
>>>>  				if (ret < 0)
>>>>  					goto err_read;
>>>>
>>>> diff --git a/criu/pagemap.c b/criu/pagemap.c
>>>> index cfc9659..0a072c8 100644
>>>> --- a/criu/pagemap.c
>>>> +++ b/criu/pagemap.c
>>>> @@ -205,7 +205,8 @@ static inline void pagemap_bound_check(PagemapEntry *pe, unsigned long vaddr, in
>>>>  	}
>>>>  }
>>>>
>>>> -static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr, void *buf)
>>>> +static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
>>>> +		void *buf, unsigned flags)
>>>>  {
>>>>  	int ret;
>>>>  	unsigned long len = nr * PAGE_SIZE;
>>>> @@ -242,7 +243,7 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
>>>>  			if (p_nr > nr)
>>>>  				p_nr = nr;
>>>>
>>>> -			ret = read_pagemap_page(ppr, vaddr, p_nr, buf);
>>>> +			ret = read_pagemap_page(ppr, vaddr, p_nr, buf, flags);
>>>>  			if (ret == -1)
>>>>  				return ret;
>>>>
>>>> diff --git a/criu/shmem.c b/criu/shmem.c
>>>> index 5065abd..05028a3 100644
>>>> --- a/criu/shmem.c
>>>> +++ b/criu/shmem.c
>>>> @@ -485,7 +485,7 @@ static int restore_shmem_content(void *addr, struct shmem_info *si)
>>>>  		if (vaddr + nr_pages * PAGE_SIZE > si->size)
>>>>  			break;
>>>>
>>>> -		pr.read_pages(&pr, vaddr, nr_pages, addr + vaddr);
>>>> +		pr.read_pages(&pr, vaddr, nr_pages, addr + vaddr, 0);
>>>>  	}
>>>>
>>>>  	pr.close(&pr);
>>>> diff --git a/criu/uffd.c b/criu/uffd.c
>>>> index e6d4069..3336651 100644
>>>> --- a/criu/uffd.c
>>>> +++ b/criu/uffd.c
>>>> @@ -398,7 +398,7 @@ static int get_page(struct lazy_pages_info *lpi, unsigned long addr, void *dest)
>>>>  	if (pagemap_zero(lpi->pr.pe))
>>>>  		return 0;
>>>>
>>>> -	ret = lpi->pr.read_pages(&lpi->pr, addr, 1, buf);
>>>> +	ret = lpi->pr.read_pages(&lpi->pr, addr, 1, buf, 0);
>>>>  	pr_debug("read_pages ret %d\n", ret);
>>>>  	if (ret <= 0)
>>>>  		return ret;
>>>> -- 
>>>> 2.1.4
>>>>
>>>> _______________________________________________
>>>> CRIU mailing list
>>>> CRIU at openvz.org
>>>> https://lists.openvz.org/mailman/listinfo/criu
>>>>
>>>
>>> .
>>>
>>
> 
> .
> 



More information about the CRIU mailing list