[CRIU] [PATCH 2/3] page-read: Add async arg to ->read_pages callback
Pavel Emelyanov
xemul at virtuozzo.com
Thu Nov 10 00:46:21 PST 2016
On 11/10/2016 09:50 AM, Mike Rapoport wrote:
> On Thu, Nov 10, 2016 at 01:02:01AM +0300, Pavel Emelyanov wrote:
>> On 11/09/2016 09:44 PM, Mike Rapoport wrote:
>>> On Mon, Nov 07, 2016 at 07:03:07PM +0300, Pavel Emelyanov wrote:
>>>> Flag PR_ASYNC means, that the caller is OK if the routine
>>>> returns w/o data read into the buffer provided.
>>>>
>>>> Flag PR_UNPLUG means, that the caller nonetheless wants the
>>>> data to be available ASAP.
>>>
>>> This one does not seem to be used neither in this patch nor in the next
>>> one.
>>
>> Yup. I created one when thought how this would work in UFFD case.
>>
>>> Can you elaborate on how it should/will be used? From the description
>>> above, the name should be PR_ASAP if we'd ever use it ;-)
>>
>> Sure, so the PR_ASYNC flag means, that the ->read_pages callback may
>> return w/o actually reading the data into the buffer. But with such a
>> notation there's a radical difference between regular restore and lazy
>> restore.
>>
>> In the former case PR_ASYNC may defer start reading the data from image
>> until the very end stage of restore. In the latter case the read can
>> (and should) be async, but the reading shouldn't be delayed even for a
>> fraction of second, since it's called from the page-fault handler. Thus
>> the 2nd flag -- PR_UNPLUG (or PR_ASAP) which means -- the caller is OK
>> with not having the data in the buffer when the callback returns, but
>> nobody wants to wait for the data for too long, so the IO should be
>> started ASAP (i.e. right now).
>>
>> So regular restore should have only PR_ASYNC flag, and it really has
>> one in restore_priv_vma_contents(), but the uffd restore should have
>> PR_ASYNC | PR_UNPLUG combination.
>
> And this combination would make read_pagemap_page grow from 5 screens to 7? ;-)
Nope :) Current (with this patch) read_pagemap_page already handles both
flags.
>> It's not there since there's no code that uses page-reads in uffd remote
>> case :)
>
> For the uffd remote pages read I was planning adding open_remote_page_read
> rather than adding if-clauses to read_pagemap_pages. If I may suggest,
> let's move forward with PR_ASYNC for the local restore and we'll take care
> of lazy remote pages later on.
> BTW, local lazy restore can use PR_ASYNC in handle_remaining_pages, I
> believe.
PR_ASYNC | PR_ASAP
>> -- Pavel
>
> --
> Sincerely yours,
> Mike.
>
>>>> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
>>>> ---
>>>> criu/include/pagemap.h | 6 +++++-
>>>> criu/mem.c | 4 ++--
>>>> criu/pagemap.c | 5 +++--
>>>> criu/shmem.c | 2 +-
>>>> criu/uffd.c | 2 +-
>>>> 5 files changed, 12 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h
>>>> index 705a9af..b445d82 100644
>>>> --- a/criu/include/pagemap.h
>>>> +++ b/criu/include/pagemap.h
>>>> @@ -47,7 +47,7 @@ struct page_read {
>>>> */
>>>> int (*get_pagemap)(struct page_read *, struct iovec *iov);
>>>> /* reads page from current pagemap */
>>>> - int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *);
>>>> + int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *, unsigned flags);
>>>> void (*close)(struct page_read *);
>>>> void (*skip_pages)(struct page_read *, unsigned long len);
>>>> int (*seek_page)(struct page_read *pr, unsigned long vaddr, bool warn);
>>>> @@ -74,6 +74,10 @@ struct page_read {
>>>> int curr_pme;
>>>> };
>>>>
>>>> +/* flags for ->read_pages */
>>>> +#define PR_ASYNC 0x1 /* may exit w/o data in the buffer */
>>>> +#define PR_UNPLUG 0x2 /* do async, but start the IO asap */
>>>> +
>>>> #define PR_SHMEM 0x1
>>>> #define PR_TASK 0x2
>>>>
>>>> diff --git a/criu/mem.c b/criu/mem.c
>>>> index 809b637..7da64f5 100644
>>>> --- a/criu/mem.c
>>>> +++ b/criu/mem.c
>>>> @@ -790,7 +790,7 @@ static int restore_priv_vma_content(struct pstree_item *t)
>>>> if (vma->ppage_bitmap) { /* inherited vma */
>>>> clear_bit(off, vma->ppage_bitmap);
>>>>
>>>> - ret = pr.read_pages(&pr, va, 1, buf);
>>>> + ret = pr.read_pages(&pr, va, 1, buf, 0);
>>>> if (ret < 0)
>>>> goto err_read;
>>>>
>>>> @@ -818,7 +818,7 @@ static int restore_priv_vma_content(struct pstree_item *t)
>>>>
>>>> nr = min_t(int, nr_pages - i, (vma->e->end - va) / PAGE_SIZE);
>>>>
>>>> - ret = pr.read_pages(&pr, va, nr, p);
>>>> + ret = pr.read_pages(&pr, va, nr, p, PR_ASYNC);
>>>> if (ret < 0)
>>>> goto err_read;
>>>>
>>>> diff --git a/criu/pagemap.c b/criu/pagemap.c
>>>> index cfc9659..0a072c8 100644
>>>> --- a/criu/pagemap.c
>>>> +++ b/criu/pagemap.c
>>>> @@ -205,7 +205,8 @@ static inline void pagemap_bound_check(PagemapEntry *pe, unsigned long vaddr, in
>>>> }
>>>> }
>>>>
>>>> -static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr, void *buf)
>>>> +static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
>>>> + void *buf, unsigned flags)
>>>> {
>>>> int ret;
>>>> unsigned long len = nr * PAGE_SIZE;
>>>> @@ -242,7 +243,7 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
>>>> if (p_nr > nr)
>>>> p_nr = nr;
>>>>
>>>> - ret = read_pagemap_page(ppr, vaddr, p_nr, buf);
>>>> + ret = read_pagemap_page(ppr, vaddr, p_nr, buf, flags);
>>>> if (ret == -1)
>>>> return ret;
>>>>
>>>> diff --git a/criu/shmem.c b/criu/shmem.c
>>>> index 5065abd..05028a3 100644
>>>> --- a/criu/shmem.c
>>>> +++ b/criu/shmem.c
>>>> @@ -485,7 +485,7 @@ static int restore_shmem_content(void *addr, struct shmem_info *si)
>>>> if (vaddr + nr_pages * PAGE_SIZE > si->size)
>>>> break;
>>>>
>>>> - pr.read_pages(&pr, vaddr, nr_pages, addr + vaddr);
>>>> + pr.read_pages(&pr, vaddr, nr_pages, addr + vaddr, 0);
>>>> }
>>>>
>>>> pr.close(&pr);
>>>> diff --git a/criu/uffd.c b/criu/uffd.c
>>>> index e6d4069..3336651 100644
>>>> --- a/criu/uffd.c
>>>> +++ b/criu/uffd.c
>>>> @@ -398,7 +398,7 @@ static int get_page(struct lazy_pages_info *lpi, unsigned long addr, void *dest)
>>>> if (pagemap_zero(lpi->pr.pe))
>>>> return 0;
>>>>
>>>> - ret = lpi->pr.read_pages(&lpi->pr, addr, 1, buf);
>>>> + ret = lpi->pr.read_pages(&lpi->pr, addr, 1, buf, 0);
>>>> pr_debug("read_pages ret %d\n", ret);
>>>> if (ret <= 0)
>>>> return ret;
>>>> --
>>>> 2.1.4
>>>>
>>>> _______________________________________________
>>>> CRIU mailing list
>>>> CRIU at openvz.org
>>>> https://lists.openvz.org/mailman/listinfo/criu
>>>>
>>>
>>> .
>>>
>>
>
> .
>
More information about the CRIU
mailing list