[CRIU] [RFC PATCH v2 02/23] lazy-pages: track outstanding page faults

Pavel Emelyanov xemul at virtuozzo.com
Wed Feb 8 01:15:01 PST 2017


Applied, but there's a question inside.

On 02/06/2017 02:43 PM, Mike Rapoport wrote:
> Multithreaded applications may concurrently generate page faults at the
> same address, which will cause creation of multiple requests for remote
> page, and, consequently, confuse the page server on the dump side.
> We can keep track on page fault requests in flight and ensure this way that
> we request a page from the remote side only once.
> 
> Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
> ---
>  criu/uffd.c | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)
> 
> diff --git a/criu/uffd.c b/criu/uffd.c
> index 557f112..050637c 100644
> --- a/criu/uffd.c
> +++ b/criu/uffd.c
> @@ -55,10 +55,16 @@ struct lazy_iovec {
>  
>  struct lazy_pages_info;
>  
> +struct lp_req {
> +	unsigned long addr;
> +	struct list_head l;
> +};
> +
>  struct lazy_pages_info {
>  	int pid;
>  
>  	struct list_head iovs;
> +	struct list_head reqs;
>  
>  	struct page_read pr;
>  
> @@ -85,6 +91,7 @@ static struct lazy_pages_info *lpi_init(void)
>  
>  	memset(lpi, 0, sizeof(*lpi));
>  	INIT_LIST_HEAD(&lpi->iovs);
> +	INIT_LIST_HEAD(&lpi->reqs);
>  	INIT_LIST_HEAD(&lpi->l);
>  	lpi->lpfd.revent = handle_uffd_event;
>  
> @@ -537,9 +544,19 @@ static int uffd_copy(struct lazy_pages_info *lpi, __u64 address, int nr_pages)
>  
>  static int complete_page_fault(struct lazy_pages_info *lpi, unsigned long vaddr, int nr)
>  {
> +	struct lp_req *req;
> +
>  	if (uffd_copy(lpi, vaddr, nr))
>  		return -1;
>  
> +	list_for_each_entry(req, &lpi->reqs, l) {
> +		if (req->addr == vaddr) {
> +			list_del(&req->l);
> +			xfree(req);
> +			break;

Shouldn't we better keep this forever and ... (below)

> +		}
> +	}
> +
>  	return update_lazy_iovecs(lpi, vaddr, nr * PAGE_SIZE);
>  }
>  
> @@ -635,6 +652,7 @@ static int handle_remaining_pages(struct lazy_pages_info *lpi)
>  
>  static int handle_page_fault(struct lazy_pages_info *lpi, struct uffd_msg *msg)
>  {
> +	struct lp_req *req;
>  	__u64 address;
>  	int ret;
>  
> @@ -642,6 +660,16 @@ static int handle_page_fault(struct lazy_pages_info *lpi, struct uffd_msg *msg)
>  	address = msg->arg.pagefault.address & ~(page_size() - 1);
>  	pr_debug("%d: #PF at 0x%llx\n", lpi->pid, address);
>  
> +	list_for_each_entry(req, &lpi->reqs, l)
> +		if (req->addr == address)
> +			return 0;

... punch a hole in lpi->iovs so that we have iovs containing a list
of not yet transfered memory and reqs -- already transfered?

> +
> +	req = xzalloc(sizeof(*req));
> +	if (!req)
> +		return -1;
> +	req->addr = address;
> +	list_add(&req->l, &lpi->reqs);
> +
>  	ret = uffd_handle_pages(lpi, address, 1, PR_ASYNC | PR_ASAP);
>  	if (ret < 0) {
>  		pr_err("Error during regular page copy\n");
> @@ -664,6 +692,9 @@ static int handle_uffd_event(struct epoll_rfd *lpfd)
>  		return 1;
>  
>  	if (ret != sizeof(msg)) {
> +		/* we've already handled the page fault for another thread */
> +		if (errno == EAGAIN)
> +			return 0;
>  		if (ret < 0)
>  			pr_perror("Can't read userfaultfd message");
>  		else
> 



More information about the CRIU mailing list