[CRIU] [PATCH 2/3] page-read: Add async arg to ->read_pages callback

Pavel Emelyanov xemul at virtuozzo.com
Wed Nov 9 14:02:01 PST 2016


On 11/09/2016 09:44 PM, Mike Rapoport wrote:
> On Mon, Nov 07, 2016 at 07:03:07PM +0300, Pavel Emelyanov wrote:
>> Flag PR_ASYNC means, that the caller is OK if the routine
>> returns w/o data read into the buffer provided.
>>
>> Flag PR_UNPLUG means, that the caller nonetheless wants the
>> data to be available ASAP.
> 
> This one does not seem to be used neither in this patch nor in the next
> one.

Yup. I created one when thought how this would work in UFFD case.

> Can you elaborate on how it should/will be used? From the description
> above, the name should be PR_ASAP if we'd ever use it ;-)

Sure, so the PR_ASYNC flag means, that the ->read_pages callback may
return w/o actually reading the data into the buffer. But with such a
notation there's a radical difference between regular restore and lazy
restore.

In the former case PR_ASYNC may defer start reading the data from image
until the very end stage of restore. In the latter case the read can
(and should) be async, but the reading shouldn't be delayed even for a 
fraction of second, since it's called from the page-fault handler. Thus
the 2nd flag -- PR_UNPLUG (or PR_ASAP) which means -- the caller is OK
with not having the data in the buffer when the callback returns, but
nobody wants to wait for the data for too long, so the IO should be
started ASAP (i.e. right now).

So regular restore should have only PR_ASYNC flag, and it really has
one in restore_priv_vma_contents(), but the uffd restore should have
PR_ASYNC | PR_UNPLUG combination. It's not there since there's no code
that uses page-reads in uffd remote case :)

-- Pavel

>> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
>> ---
>>  criu/include/pagemap.h | 6 +++++-
>>  criu/mem.c             | 4 ++--
>>  criu/pagemap.c         | 5 +++--
>>  criu/shmem.c           | 2 +-
>>  criu/uffd.c            | 2 +-
>>  5 files changed, 12 insertions(+), 7 deletions(-)
>>
>> diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h
>> index 705a9af..b445d82 100644
>> --- a/criu/include/pagemap.h
>> +++ b/criu/include/pagemap.h
>> @@ -47,7 +47,7 @@ struct page_read {
>>  	 */
>>  	int (*get_pagemap)(struct page_read *, struct iovec *iov);
>>  	/* reads page from current pagemap */
>> -	int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *);
>> +	int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *, unsigned flags);
>>  	void (*close)(struct page_read *);
>>  	void (*skip_pages)(struct page_read *, unsigned long len);
>>  	int (*seek_page)(struct page_read *pr, unsigned long vaddr, bool warn);
>> @@ -74,6 +74,10 @@ struct page_read {
>>  	int curr_pme;
>>  };
>>
>> +/* flags for ->read_pages */
>> +#define PR_ASYNC	0x1 /* may exit w/o data in the buffer */
>> +#define PR_UNPLUG	0x2 /* do async, but start the IO asap */
>> +
>>  #define PR_SHMEM	0x1
>>  #define PR_TASK		0x2
>>
>> diff --git a/criu/mem.c b/criu/mem.c
>> index 809b637..7da64f5 100644
>> --- a/criu/mem.c
>> +++ b/criu/mem.c
>> @@ -790,7 +790,7 @@ static int restore_priv_vma_content(struct pstree_item *t)
>>  			if (vma->ppage_bitmap) { /* inherited vma */
>>  				clear_bit(off, vma->ppage_bitmap);
>>
>> -				ret = pr.read_pages(&pr, va, 1, buf);
>> +				ret = pr.read_pages(&pr, va, 1, buf, 0);
>>  				if (ret < 0)
>>  					goto err_read;
>>
>> @@ -818,7 +818,7 @@ static int restore_priv_vma_content(struct pstree_item *t)
>>
>>  				nr = min_t(int, nr_pages - i, (vma->e->end - va) / PAGE_SIZE);
>>
>> -				ret = pr.read_pages(&pr, va, nr, p);
>> +				ret = pr.read_pages(&pr, va, nr, p, PR_ASYNC);
>>  				if (ret < 0)
>>  					goto err_read;
>>
>> diff --git a/criu/pagemap.c b/criu/pagemap.c
>> index cfc9659..0a072c8 100644
>> --- a/criu/pagemap.c
>> +++ b/criu/pagemap.c
>> @@ -205,7 +205,8 @@ static inline void pagemap_bound_check(PagemapEntry *pe, unsigned long vaddr, in
>>  	}
>>  }
>>
>> -static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr, void *buf)
>> +static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
>> +		void *buf, unsigned flags)
>>  {
>>  	int ret;
>>  	unsigned long len = nr * PAGE_SIZE;
>> @@ -242,7 +243,7 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
>>  			if (p_nr > nr)
>>  				p_nr = nr;
>>
>> -			ret = read_pagemap_page(ppr, vaddr, p_nr, buf);
>> +			ret = read_pagemap_page(ppr, vaddr, p_nr, buf, flags);
>>  			if (ret == -1)
>>  				return ret;
>>
>> diff --git a/criu/shmem.c b/criu/shmem.c
>> index 5065abd..05028a3 100644
>> --- a/criu/shmem.c
>> +++ b/criu/shmem.c
>> @@ -485,7 +485,7 @@ static int restore_shmem_content(void *addr, struct shmem_info *si)
>>  		if (vaddr + nr_pages * PAGE_SIZE > si->size)
>>  			break;
>>
>> -		pr.read_pages(&pr, vaddr, nr_pages, addr + vaddr);
>> +		pr.read_pages(&pr, vaddr, nr_pages, addr + vaddr, 0);
>>  	}
>>
>>  	pr.close(&pr);
>> diff --git a/criu/uffd.c b/criu/uffd.c
>> index e6d4069..3336651 100644
>> --- a/criu/uffd.c
>> +++ b/criu/uffd.c
>> @@ -398,7 +398,7 @@ static int get_page(struct lazy_pages_info *lpi, unsigned long addr, void *dest)
>>  	if (pagemap_zero(lpi->pr.pe))
>>  		return 0;
>>
>> -	ret = lpi->pr.read_pages(&lpi->pr, addr, 1, buf);
>> +	ret = lpi->pr.read_pages(&lpi->pr, addr, 1, buf, 0);
>>  	pr_debug("read_pages ret %d\n", ret);
>>  	if (ret <= 0)
>>  		return ret;
>> -- 
>> 2.1.4
>>
>> _______________________________________________
>> CRIU mailing list
>> CRIU at openvz.org
>> https://lists.openvz.org/mailman/listinfo/criu
>>
> 
> .
> 



More information about the CRIU mailing list