[CRIU] [PATCH RFC 6/8] criu: page-xfer: add PS_IOV_GET interface

Mike Rapoport rppt at linux.vnet.ibm.com
Sat May 28 23:52:46 PDT 2016


On Fri, May 27, 2016 at 10:31:59PM +0300, Pavel Emelyanov wrote:
> On 05/21/2016 01:49 PM, Mike Rapoport wrote:
> > When dump side is acting as lazy pages server it should be able to respond
> > to random page access requests
> > The protocol is quite simple:
> > - the restore sends PS_IOV_GET command with PID, address and number
> >   of pages it wishes to get
> 
> Ack
> 
> > - the dump side replies with PS_IOV_GET command. 
> 
> Oops. Why PS_IOV_GET? We have PS_IOV_ADD for sending pages.
 
PS_IOV_ADD is for pushing pages. PS_IOV_GET is for pulling them :)

> >   The nr_pages field is
> >   updated to reflect actual amount of pages that the dump side is going to
> >   send. If the pages in question are mapped to zero pfn, the entire
> >   PS_IOV_GET reply is zeroed.
> > - After the PS_IOV_GET command the dump side sends actual page data
> > 
> > Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
> > ---
> >  criu/include/page-xfer.h |  2 +
> >  criu/page-xfer.c         | 99 ++++++++++++++++++++++++++++++++++++++++++++----
> >  2 files changed, 93 insertions(+), 8 deletions(-)
> > 
> > diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
> > index fb222c3..25a9da7 100644
> > --- a/criu/include/page-xfer.h
> > +++ b/criu/include/page-xfer.h
> > @@ -44,4 +44,6 @@ extern int disconnect_from_page_server(void);
> >  
> >  extern int check_parent_page_xfer(int fd_type, long id);
> >  
> > +extern int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest);
> > +
> >  #endif /* __CR_PAGE_XFER__H__ */
> > diff --git a/criu/page-xfer.c b/criu/page-xfer.c
> > index c1716a5..bc5472e 100644
> > --- a/criu/page-xfer.c
> > +++ b/criu/page-xfer.c
> > @@ -16,6 +16,8 @@
> >  #include "util.h"
> >  #include "protobuf.h"
> >  #include "images/pagemap.pb-c.h"
> > +#include "pstree.h"
> > +#include "parasite-syscall.h"
> >  
> >  struct page_server_iov {
> >  	u32	cmd;
> > @@ -43,6 +45,7 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id);
> >  #define PS_IOV_OPEN	3
> >  #define PS_IOV_OPEN2	4
> >  #define PS_IOV_PARENT	5
> > +#define PS_IOV_GET	6
> >  
> >  #define PS_IOV_FLUSH		0x1023
> >  #define PS_IOV_FLUSH_N_CLOSE	0x1024
> > @@ -176,6 +179,46 @@ static int page_server_hole(int sk, struct page_server_iov *pi)
> >  	return 0;
> >  }
> >  
> > +static int page_server_get_pages(int sk, struct page_server_iov *pi)
> > +{
> > +	struct pstree_item *item;
> > +	struct page_pipe *pp;
> > +	struct page_pipe_buf *ppb;
> > +	struct iovec *iov;
> > +	int ret;
> > +
> > +	item = pstree_item_by_virt(pi->dst_id);
> > +	pp = item->parasite_ctl->mem_pp;
> > +
> > +	ret = page_pipe_split(pp, pi->vaddr, &pi->nr_pages);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (pi->nr_pages == 0) {
> > +		/* no iovs found means we've hit a zero page */
> > +		pr_debug("no iovs found, zero pages\n");
> > +		memset(pi, 0, sizeof(*pi));
> > +
> > +		return write(sk, pi, sizeof(*pi)) != sizeof(*pi);
> > +	}
> > +
> > +	ppb = list_first_entry(&pp->bufs, struct page_pipe_buf, l);
> > +	iov = &ppb->iov[0];
> > +
> > +	BUG_ON(!(ppb->flags & PPB_LAZY));
> > +	BUG_ON(iov->iov_len != pi->nr_pages * PAGE_SIZE);
> > +	BUG_ON(pi->vaddr != encode_pointer(iov->iov_base));
> > +
> > +	if (write(sk, pi, sizeof(*pi)) != sizeof(*pi))
> > +		return -1;
> > +
> > +	ret = splice(ppb->p[0], NULL, sk, NULL, iov->iov_len, SPLICE_F_MOVE);
> > +	if (ret != iov->iov_len)
> > +		return -1;
> > +
> > +	return 0;
> > +}
> > +
> >  static int page_server_check_parent(int sk, struct page_server_iov *pi);
> >  
> >  static int page_server_serve(int sk)
> > @@ -190,14 +233,16 @@ static int page_server_serve(int sk)
> >  	 */
> >  	tcp_nodelay(sk, true);
> >  
> > -	if (pipe(cxfer.p)) {
> > -		pr_perror("Can't make pipe for xfer");
> > -		close(sk);
> > -		return -1;
> > -	}
> > +	if (!opts.lazy_pages) {
> > +		if (pipe(cxfer.p)) {
> > +			pr_perror("Can't make pipe for xfer");
> > +			close(sk);
> > +			return -1;
> > +		}
> >  
> > -	cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
> > -	pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
> > +		cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
> > +		pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
> > +	}
> >  
> >  	while (1) {
> >  		struct page_server_iov pi;
> > @@ -249,6 +294,10 @@ static int page_server_serve(int sk)
> >  			flushed = true;
> >  			break;
> >  		}
> > +		case PS_IOV_GET:
> > +			flushed = true;
> > +			ret = page_server_get_pages(sk, &pi);
> > +			break;
> >  		default:
> >  			pr_err("Unknown command %u\n", pi.cmd);
> >  			ret = -1;
> > @@ -291,7 +340,8 @@ int cr_page_server(bool daemon_mode, int cfd)
> >  	int sk = -1;
> >  	int ret;
> >  
> > -	up_page_ids_base();
> > +	if (!opts.lazy_pages)
> > +		up_page_ids_base();
> >  
> >  	if (opts.ps_socket != -1) {
> >  		ret = 0;
> > @@ -787,3 +837,36 @@ int check_parent_page_xfer(int fd_type, long id)
> >  	else
> >  		return check_parent_local_xfer(fd_type, id);
> >  }
> > +
> > +int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest)
> > +{
> > +	int ret;
> > +
> > +	struct page_server_iov pi = {
> > +		.cmd = PS_IOV_GET,
> > +		.nr_pages = nr_pages,
> > +		.vaddr = addr,
> > +		.dst_id = pid,
> > +	};
> > +
> > +	ret = write(page_server_sk, &pi, sizeof(pi));
> > +	if (ret != sizeof(pi))
> > +		return -1;
> > +
> > +	ret = recv(page_server_sk, &pi, sizeof(pi), MSG_WAITALL);
> > +	if (ret != sizeof(pi))
> > +		return -1;
> > +
> > +	/* zero page */
> > +	if (pi.cmd == 0 && pi.vaddr == 0 && pi.nr_pages == 0 && pi.dst_id == 0)
> > +		return 0;
> > +
> > +	if (pi.nr_pages > nr_pages)
> > +		return -1;
> > +
> > +	ret = recv(page_server_sk, dest, PAGE_SIZE, MSG_WAITALL);
> > +	if (ret != PAGE_SIZE)
> > +		return -1;
> > +
> > +	return 1;
> > +}
> > 
> 



More information about the CRIU mailing list