[CRIU] [PATCH RFC 6/8] criu: page-xfer: add PS_IOV_GET interface
Pavel Emelyanov
xemul at virtuozzo.com
Mon May 30 04:00:41 PDT 2016
On 05/29/2016 09:52 AM, Mike Rapoport wrote:
> On Fri, May 27, 2016 at 10:31:59PM +0300, Pavel Emelyanov wrote:
>> On 05/21/2016 01:49 PM, Mike Rapoport wrote:
>>> When dump side is acting as lazy pages server it should be able to respond
>>> to random page access requests
>>> The protocol is quite simple:
>>> - the restore sends PS_IOV_GET command with PID, address and number
>>> of pages it wishes to get
>>
>> Ack
>>
>>> - the dump side replies with PS_IOV_GET command.
>>
>> Oops. Why PS_IOV_GET? We have PS_IOV_ADD for sending pages.
>
> PS_IOV_ADD is for pushing pages. PS_IOV_GET is for pulling them :)
Yes, _GET is what restore side sends to dump side, but then dump side
should just do regular PS_IOV_ADD, no? (one more comment below).
>>> The nr_pages field is
>>> updated to reflect actual amount of pages that the dump side is going to
>>> send. If the pages in question are mapped to zero pfn, the entire
>>> PS_IOV_GET reply is zeroed.
>>> - After the PS_IOV_GET command the dump side sends actual page data
>>>
>>> Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
>>> ---
>>> criu/include/page-xfer.h | 2 +
>>> criu/page-xfer.c | 99 ++++++++++++++++++++++++++++++++++++++++++++----
>>> 2 files changed, 93 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
>>> index fb222c3..25a9da7 100644
>>> --- a/criu/include/page-xfer.h
>>> +++ b/criu/include/page-xfer.h
>>> @@ -44,4 +44,6 @@ extern int disconnect_from_page_server(void);
>>>
>>> extern int check_parent_page_xfer(int fd_type, long id);
>>>
>>> +extern int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest);
>>> +
>>> #endif /* __CR_PAGE_XFER__H__ */
>>> diff --git a/criu/page-xfer.c b/criu/page-xfer.c
>>> index c1716a5..bc5472e 100644
>>> --- a/criu/page-xfer.c
>>> +++ b/criu/page-xfer.c
>>> @@ -16,6 +16,8 @@
>>> #include "util.h"
>>> #include "protobuf.h"
>>> #include "images/pagemap.pb-c.h"
>>> +#include "pstree.h"
>>> +#include "parasite-syscall.h"
>>>
>>> struct page_server_iov {
>>> u32 cmd;
>>> @@ -43,6 +45,7 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id);
>>> #define PS_IOV_OPEN 3
>>> #define PS_IOV_OPEN2 4
>>> #define PS_IOV_PARENT 5
>>> +#define PS_IOV_GET 6
>>>
>>> #define PS_IOV_FLUSH 0x1023
>>> #define PS_IOV_FLUSH_N_CLOSE 0x1024
>>> @@ -176,6 +179,46 @@ static int page_server_hole(int sk, struct page_server_iov *pi)
>>> return 0;
>>> }
>>>
>>> +static int page_server_get_pages(int sk, struct page_server_iov *pi)
>>> +{
>>> + struct pstree_item *item;
>>> + struct page_pipe *pp;
>>> + struct page_pipe_buf *ppb;
>>> + struct iovec *iov;
>>> + int ret;
>>> +
>>> + item = pstree_item_by_virt(pi->dst_id);
>>> + pp = item->parasite_ctl->mem_pp;
>>> +
>>> + ret = page_pipe_split(pp, pi->vaddr, &pi->nr_pages);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + if (pi->nr_pages == 0) {
>>> + /* no iovs found means we've hit a zero page */
>>> + pr_debug("no iovs found, zero pages\n");
>>> + memset(pi, 0, sizeof(*pi));
This looks like PS_IOV_HOLE. But even if it doesn't let's add special
PS_IOV_..._ZERO_PAGE? command for this instead of zeroified pi.
>>> +
>>> + return write(sk, pi, sizeof(*pi)) != sizeof(*pi);
>>> + }
>>> +
>>> + ppb = list_first_entry(&pp->bufs, struct page_pipe_buf, l);
>>> + iov = &ppb->iov[0];
>>> +
>>> + BUG_ON(!(ppb->flags & PPB_LAZY));
>>> + BUG_ON(iov->iov_len != pi->nr_pages * PAGE_SIZE);
>>> + BUG_ON(pi->vaddr != encode_pointer(iov->iov_base));
>>> +
>>> + if (write(sk, pi, sizeof(*pi)) != sizeof(*pi))
>>> + return -1;
>>> +
>>> + ret = splice(ppb->p[0], NULL, sk, NULL, iov->iov_len, SPLICE_F_MOVE);
>>> + if (ret != iov->iov_len)
>>> + return -1;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static int page_server_check_parent(int sk, struct page_server_iov *pi);
>>>
>>> static int page_server_serve(int sk)
>>> @@ -190,14 +233,16 @@ static int page_server_serve(int sk)
>>> */
>>> tcp_nodelay(sk, true);
>>>
>>> - if (pipe(cxfer.p)) {
>>> - pr_perror("Can't make pipe for xfer");
>>> - close(sk);
>>> - return -1;
>>> - }
>>> + if (!opts.lazy_pages) {
>>> + if (pipe(cxfer.p)) {
>>> + pr_perror("Can't make pipe for xfer");
>>> + close(sk);
>>> + return -1;
>>> + }
>>>
>>> - cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
>>> - pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
>>> + cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
>>> + pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
>>> + }
>>>
>>> while (1) {
>>> struct page_server_iov pi;
>>> @@ -249,6 +294,10 @@ static int page_server_serve(int sk)
>>> flushed = true;
>>> break;
>>> }
>>> + case PS_IOV_GET:
>>> + flushed = true;
>>> + ret = page_server_get_pages(sk, &pi);
>>> + break;
>>> default:
>>> pr_err("Unknown command %u\n", pi.cmd);
>>> ret = -1;
>>> @@ -291,7 +340,8 @@ int cr_page_server(bool daemon_mode, int cfd)
>>> int sk = -1;
>>> int ret;
>>>
>>> - up_page_ids_base();
>>> + if (!opts.lazy_pages)
>>> + up_page_ids_base();
>>>
>>> if (opts.ps_socket != -1) {
>>> ret = 0;
>>> @@ -787,3 +837,36 @@ int check_parent_page_xfer(int fd_type, long id)
>>> else
>>> return check_parent_local_xfer(fd_type, id);
>>> }
>>> +
>>> +int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest)
>>> +{
>>> + int ret;
>>> +
>>> + struct page_server_iov pi = {
>>> + .cmd = PS_IOV_GET,
>>> + .nr_pages = nr_pages,
>>> + .vaddr = addr,
>>> + .dst_id = pid,
>>> + };
>>> +
>>> + ret = write(page_server_sk, &pi, sizeof(pi));
>>> + if (ret != sizeof(pi))
>>> + return -1;
>>> +
>>> + ret = recv(page_server_sk, &pi, sizeof(pi), MSG_WAITALL);
>>> + if (ret != sizeof(pi))
>>> + return -1;
>>> +
>>> + /* zero page */
>>> + if (pi.cmd == 0 && pi.vaddr == 0 && pi.nr_pages == 0 && pi.dst_id == 0)
>>> + return 0;
>>> +
>>> + if (pi.nr_pages > nr_pages)
>>> + return -1;
>>> +
>>> + ret = recv(page_server_sk, dest, PAGE_SIZE, MSG_WAITALL);
>>> + if (ret != PAGE_SIZE)
>>> + return -1;
>>> +
>>> + return 1;
>>> +}
>>>
>>
>
> .
>
More information about the CRIU
mailing list